<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[193125] trunk/Source/JavaScriptCore</title>
</head>
<body>

<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.webkit.org/projects/webkit/changeset/193125">193125</a></dd>
<dt>Author</dt> <dd>commit-queue@webkit.org</dd>
<dt>Date</dt> <dd>2015-12-03 10:49:04 -0800 (Thu, 03 Dec 2015)</dd>
</dl>

<h3>Log Message</h3>
<pre>[JSC] Add CLZ support to B3
https://bugs.webkit.org/show_bug.cgi?id=151799

Patch by Benjamin Poulain &lt;bpoulain@apple.com&gt; on 2015-12-03
Reviewed by Michael Saboff.

Previously we were counting on LLVM to select LZCNT
when its available.
Since we have to do that ourself now, I added feature
detection based on the CPUID. The MacroAssembler just
pick the best available lowering based on the platform.

* assembler/MacroAssemblerX86Common.cpp:
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::countLeadingZeros32):
(JSC::MacroAssemblerX86Common::supportsLZCNT):
(JSC::MacroAssemblerX86Common::clz32AfterBsr):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::countLeadingZeros64):
(JSC::MacroAssemblerX86_64::clz64AfterBsr):
* assembler/X86Assembler.h:
(JSC::X86Assembler::lzcnt_rr):
(JSC::X86Assembler::lzcnt_mr):
(JSC::X86Assembler::lzcntq_rr):
(JSC::X86Assembler::lzcntq_mr):
(JSC::X86Assembler::bsr_mr):
(JSC::X86Assembler::bsrq_rr):
(JSC::X86Assembler::bsrq_mr):
* b3/B3LowerToAir.cpp:
(JSC::B3::Air::LowerToAir::lower):
* b3/B3Opcode.cpp:
(WTF::printInternal):
* b3/B3Opcode.h:
* b3/B3Validate.cpp:
* b3/B3Value.cpp:
(JSC::B3::Value::effects):
(JSC::B3::Value::key):
(JSC::B3::Value::typeFor):
* b3/air/AirOpcode.opcodes:
* b3/testb3.cpp:
(JSC::B3::countLeadingZero):
(JSC::B3::testClzArg64):
(JSC::B3::testClzMem64):
(JSC::B3::testClzArg32):
(JSC::B3::testClzMem32):
(JSC::B3::doubleOperands):
(JSC::B3::run):
* ftl/FTLB3Output.h:
(JSC::FTL::Output::ctlz32):
* ftl/FTLLowerDFGToLLVM.cpp:
(JSC::FTL::DFG::LowerDFGToLLVM::compileArithClz32):
* ftl/FTLOutput.h:
(JSC::FTL::Output::ctlz32):</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkSourceJavaScriptCoreChangeLog">trunk/Source/JavaScriptCore/ChangeLog</a></li>
<li><a href="#trunkSourceJavaScriptCoreassemblerMacroAssemblerX86Commoncpp">trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreassemblerMacroAssemblerX86Commonh">trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreassemblerMacroAssemblerX86_64h">trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreassemblerX86Assemblerh">trunk/Source/JavaScriptCore/assembler/X86Assembler.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3LowerToAircpp">trunk/Source/JavaScriptCore/b3/B3LowerToAir.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Opcodecpp">trunk/Source/JavaScriptCore/b3/B3Opcode.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Opcodeh">trunk/Source/JavaScriptCore/b3/B3Opcode.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Validatecpp">trunk/Source/JavaScriptCore/b3/B3Validate.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Valuecpp">trunk/Source/JavaScriptCore/b3/B3Value.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirOpcodeopcodes">trunk/Source/JavaScriptCore/b3/air/AirOpcode.opcodes</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3testb3cpp">trunk/Source/JavaScriptCore/b3/testb3.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreftlFTLB3Outputh">trunk/Source/JavaScriptCore/ftl/FTLB3Output.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreftlFTLLowerDFGToLLVMcpp">trunk/Source/JavaScriptCore/ftl/FTLLowerDFGToLLVM.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreftlFTLOutputh">trunk/Source/JavaScriptCore/ftl/FTLOutput.h</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkSourceJavaScriptCoreChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/ChangeLog (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/ChangeLog        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/ChangeLog        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -1,3 +1,58 @@
</span><ins>+2015-12-03  Benjamin Poulain  &lt;bpoulain@apple.com&gt;
+
+        [JSC] Add CLZ support to B3
+        https://bugs.webkit.org/show_bug.cgi?id=151799
+
+        Reviewed by Michael Saboff.
+
+        Previously we were counting on LLVM to select LZCNT
+        when its available.
+        Since we have to do that ourself now, I added feature
+        detection based on the CPUID. The MacroAssembler just
+        pick the best available lowering based on the platform.
+
+        * assembler/MacroAssemblerX86Common.cpp:
+        * assembler/MacroAssemblerX86Common.h:
+        (JSC::MacroAssemblerX86Common::countLeadingZeros32):
+        (JSC::MacroAssemblerX86Common::supportsLZCNT):
+        (JSC::MacroAssemblerX86Common::clz32AfterBsr):
+        * assembler/MacroAssemblerX86_64.h:
+        (JSC::MacroAssemblerX86_64::countLeadingZeros64):
+        (JSC::MacroAssemblerX86_64::clz64AfterBsr):
+        * assembler/X86Assembler.h:
+        (JSC::X86Assembler::lzcnt_rr):
+        (JSC::X86Assembler::lzcnt_mr):
+        (JSC::X86Assembler::lzcntq_rr):
+        (JSC::X86Assembler::lzcntq_mr):
+        (JSC::X86Assembler::bsr_mr):
+        (JSC::X86Assembler::bsrq_rr):
+        (JSC::X86Assembler::bsrq_mr):
+        * b3/B3LowerToAir.cpp:
+        (JSC::B3::Air::LowerToAir::lower):
+        * b3/B3Opcode.cpp:
+        (WTF::printInternal):
+        * b3/B3Opcode.h:
+        * b3/B3Validate.cpp:
+        * b3/B3Value.cpp:
+        (JSC::B3::Value::effects):
+        (JSC::B3::Value::key):
+        (JSC::B3::Value::typeFor):
+        * b3/air/AirOpcode.opcodes:
+        * b3/testb3.cpp:
+        (JSC::B3::countLeadingZero):
+        (JSC::B3::testClzArg64):
+        (JSC::B3::testClzMem64):
+        (JSC::B3::testClzArg32):
+        (JSC::B3::testClzMem32):
+        (JSC::B3::doubleOperands):
+        (JSC::B3::run):
+        * ftl/FTLB3Output.h:
+        (JSC::FTL::Output::ctlz32):
+        * ftl/FTLLowerDFGToLLVM.cpp:
+        (JSC::FTL::DFG::LowerDFGToLLVM::compileArithClz32):
+        * ftl/FTLOutput.h:
+        (JSC::FTL::Output::ctlz32):
+
</ins><span class="cx"> 2015-12-02  Mark Lam  &lt;mark.lam@apple.com&gt;
</span><span class="cx"> 
</span><span class="cx">         Polymorphic operand types for DFG and FTL mul.
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreassemblerMacroAssemblerX86Commoncpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -552,6 +552,8 @@
</span><span class="cx"> MacroAssemblerX86Common::SSE2CheckState MacroAssemblerX86Common::s_sse2CheckState = NotCheckedSSE2;
</span><span class="cx"> #endif
</span><span class="cx"> 
</span><ins>+MacroAssemblerX86Common::LZCNTCheckState MacroAssemblerX86Common::s_lzcntCheckState = LZCNTCheckState::NotChecked;
+
</ins><span class="cx"> } // namespace JSC
</span><span class="cx"> 
</span><span class="cx"> #endif // ENABLE(ASSEMBLER) &amp;&amp; (CPU(X86) || CPU(X86_64))
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreassemblerMacroAssemblerX86Commonh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -205,14 +205,22 @@
</span><span class="cx"> 
</span><span class="cx">     void countLeadingZeros32(RegisterID src, RegisterID dst)
</span><span class="cx">     {
</span><ins>+        if (supportsLZCNT()) {
+            m_assembler.lzcnt_rr(src, dst);
+            return;
+        }
</ins><span class="cx">         m_assembler.bsr_rr(src, dst);
</span><del>-        Jump srcIsNonZero = m_assembler.jCC(x86Condition(NonZero));
-        move(TrustedImm32(32), dst);
</del><ins>+        clz32AfterBsr(dst);
+    }
</ins><span class="cx"> 
</span><del>-        Jump skipNonZeroCase = jump();
-        srcIsNonZero.link(this);
-        xor32(TrustedImm32(0x1f), dst);
-        skipNonZeroCase.link(this);
</del><ins>+    void countLeadingZeros32(Address src, RegisterID dst)
+    {
+        if (supportsLZCNT()) {
+            m_assembler.lzcnt_mr(src.offset, src.base, dst);
+            return;
+        }
+        m_assembler.bsr_mr(src.offset, src.base, dst);
+        clz32AfterBsr(dst);
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     void lshift32(RegisterID shift_amount, RegisterID dest)
</span><span class="lines">@@ -1706,6 +1714,31 @@
</span><span class="cx"> #endif
</span><span class="cx">     }
</span><span class="cx">     
</span><ins>+    static bool supportsLZCNT()
+    {
+        if (s_lzcntCheckState == LZCNTCheckState::NotChecked) {
+            int flags = 0;
+#if COMPILER(MSVC)
+            _asm {
+                mov eax, 0x80000001
+                cpuid;
+                mov flags, ecx;
+            }
+#elif COMPILER(GCC_OR_CLANG)
+            asm (
+                &quot;movl $0x80000001, %%eax;&quot;
+                &quot;cpuid;&quot;
+                &quot;movl %%ecx, %0;&quot;
+                : &quot;=g&quot; (flags)
+                :
+                : &quot;%eax&quot;, &quot;%ebx&quot;, &quot;%ecx&quot;, &quot;%edx&quot;
+                );
+#endif
+            s_lzcntCheckState = (flags &amp; 0x20) ? LZCNTCheckState::Set : LZCNTCheckState::Clear;
+        }
+        return s_lzcntCheckState == LZCNTCheckState::Set;
+    }
+
</ins><span class="cx"> private:
</span><span class="cx">     // Only MacroAssemblerX86 should be using the following method; SSE2 is always available on
</span><span class="cx">     // x86_64, and clients &amp; subclasses of MacroAssembler should be using 'supportsFloatingPoint()'.
</span><span class="lines">@@ -1740,6 +1773,19 @@
</span><span class="cx">         m_assembler.addl_im(imm.m_value, address.offset, address.base);
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    // If lzcnt is not available, use this after BSR
+    // to count the leading zeros.
+    void clz32AfterBsr(RegisterID dst)
+    {
+        Jump srcIsNonZero = m_assembler.jCC(x86Condition(NonZero));
+        move(TrustedImm32(32), dst);
+
+        Jump skipNonZeroCase = jump();
+        srcIsNonZero.link(this);
+        xor32(TrustedImm32(0x1f), dst);
+        skipNonZeroCase.link(this);
+    }
+
</ins><span class="cx"> #if CPU(X86)
</span><span class="cx"> #if OS(MAC_OS_X)
</span><span class="cx"> 
</span><span class="lines">@@ -1803,6 +1849,13 @@
</span><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx"> #endif
</span><ins>+
+    enum class LZCNTCheckState {
+        NotChecked,
+        Clear,
+        Set
+    };
+    static LZCNTCheckState s_lzcntCheckState;
</ins><span class="cx"> };
</span><span class="cx"> 
</span><span class="cx"> } // namespace JSC
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreassemblerMacroAssemblerX86_64h"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -334,6 +334,26 @@
</span><span class="cx">         and64(scratchRegister(), srcDest);
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    void countLeadingZeros64(RegisterID src, RegisterID dst)
+    {
+        if (supportsLZCNT()) {
+            m_assembler.lzcntq_rr(src, dst);
+            return;
+        }
+        m_assembler.bsrq_rr(src, dst);
+        clz64AfterBsr(dst);
+    }
+
+    void countLeadingZeros64(Address src, RegisterID dst)
+    {
+        if (supportsLZCNT()) {
+            m_assembler.lzcntq_mr(src.offset, src.base, dst);
+            return;
+        }
+        m_assembler.bsrq_mr(src.offset, src.base, dst);
+        clz64AfterBsr(dst);
+    }
+
</ins><span class="cx">     void lshift64(TrustedImm32 imm, RegisterID dest)
</span><span class="cx">     {
</span><span class="cx">         m_assembler.shlq_i8r(imm.m_value, dest);
</span><span class="lines">@@ -1047,6 +1067,19 @@
</span><span class="cx">         }
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    // If lzcnt is not available, use this after BSR
+    // to count the leading zeros.
+    void clz64AfterBsr(RegisterID dst)
+    {
+        Jump srcIsNonZero = m_assembler.jCC(x86Condition(NonZero));
+        move(TrustedImm32(64), dst);
+
+        Jump skipNonZeroCase = jump();
+        srcIsNonZero.link(this);
+        xor64(TrustedImm32(0x3f), dst);
+        skipNonZeroCase.link(this);
+    }
+
</ins><span class="cx">     friend class LinkBuffer;
</span><span class="cx"> 
</span><span class="cx">     static void linkCall(void* code, Call call, FunctionPtr function)
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreassemblerX86Assemblerh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/assembler/X86Assembler.h (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/assembler/X86Assembler.h        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/assembler/X86Assembler.h        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -283,6 +283,7 @@
</span><span class="cx">         OP2_IMUL_GvEv       = 0xAF,
</span><span class="cx">         OP2_MOVZX_GvEb      = 0xB6,
</span><span class="cx">         OP2_BSR             = 0xBD,
</span><ins>+        OP2_LZCNT           = 0xBD,
</ins><span class="cx">         OP2_MOVSX_GvEb      = 0xBE,
</span><span class="cx">         OP2_MOVZX_GvEw      = 0xB7,
</span><span class="cx">         OP2_MOVSX_GvEw      = 0xBF,
</span><span class="lines">@@ -860,11 +861,54 @@
</span><span class="cx"> 
</span><span class="cx"> #endif
</span><span class="cx"> 
</span><ins>+    void lzcnt_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp(OP2_LZCNT, dst, src);
+    }
+
+    void lzcnt_mr(int offset, RegisterID base, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp(OP2_LZCNT, dst, base, offset);
+    }
+
+#if CPU(X86_64)
+    void lzcntq_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp64(OP2_LZCNT, dst, src);
+    }
+
+    void lzcntq_mr(int offset, RegisterID base, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp64(OP2_LZCNT, dst, base, offset);
+    }
+#endif
+
</ins><span class="cx">     void bsr_rr(RegisterID src, RegisterID dst)
</span><span class="cx">     {
</span><span class="cx">         m_formatter.twoByteOp(OP2_BSR, dst, src);
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    void bsr_mr(int offset, RegisterID base, RegisterID dst)
+    {
+        m_formatter.twoByteOp(OP2_BSR, dst, base, offset);
+    }
+
+#if CPU(X86_64)
+    void bsrq_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.twoByteOp64(OP2_BSR, dst, src);
+    }
+
+    void bsrq_mr(int offset, RegisterID base, RegisterID dst)
+    {
+        m_formatter.twoByteOp64(OP2_BSR, dst, base, offset);
+    }
+#endif
+
</ins><span class="cx">     void sarl_i8r(int imm, RegisterID dst)
</span><span class="cx">     {
</span><span class="cx">         if (imm == 1)
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3LowerToAircpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3LowerToAir.cpp (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3LowerToAir.cpp        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/b3/B3LowerToAir.cpp        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -1463,6 +1463,11 @@
</span><span class="cx">             return;
</span><span class="cx">         }
</span><span class="cx"> 
</span><ins>+        case Clz: {
+            appendUnOp&lt;CountLeadingZeros32, CountLeadingZeros64, Air::Oops&gt;(m_value-&gt;child(0));
+            return;
+        }
+
</ins><span class="cx">         case Sqrt: {
</span><span class="cx">             appendUnOp&lt;Air::Oops, Air::Oops, SqrtDouble&gt;(m_value-&gt;child(0));
</span><span class="cx">             return;
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Opcodecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Opcode.cpp (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Opcode.cpp        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/b3/B3Opcode.cpp        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -137,6 +137,9 @@
</span><span class="cx">     case ZShr:
</span><span class="cx">         out.print(&quot;ZShr&quot;);
</span><span class="cx">         return;
</span><ins>+    case Clz:
+        out.print(&quot;Clz&quot;);
+        return;
</ins><span class="cx">     case Sqrt:
</span><span class="cx">         out.print(&quot;Sqrt&quot;);
</span><span class="cx">         return;
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Opcodeh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Opcode.h (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Opcode.h        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/b3/B3Opcode.h        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -80,6 +80,7 @@
</span><span class="cx">     Shl,
</span><span class="cx">     SShr, // Arithmetic Shift.
</span><span class="cx">     ZShr, // Logical Shift.
</span><ins>+    Clz, // Count leading zeros.
</ins><span class="cx"> 
</span><span class="cx">     // Double math.
</span><span class="cx">     Sqrt,
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Validatecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Validate.cpp (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Validate.cpp        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/b3/B3Validate.cpp        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -192,6 +192,11 @@
</span><span class="cx">                 VALIDATE(value-&gt;child(0)-&gt;type() == Int32, (&quot;At &quot;, *value));
</span><span class="cx">                 VALIDATE(value-&gt;type() == Int64, (&quot;At &quot;, *value));
</span><span class="cx">                 break;
</span><ins>+            case Clz:
+                VALIDATE(value-&gt;numChildren() == 1, (&quot;At &quot;, *value));
+                VALIDATE(isInt(value-&gt;child(0)-&gt;type()), (&quot;At &quot;, *value));
+                VALIDATE(isInt(value-&gt;type()), (&quot;At &quot;, *value));
+                break;
</ins><span class="cx">             case Trunc:
</span><span class="cx">                 VALIDATE(value-&gt;numChildren() == 1, (&quot;At &quot;, *value));
</span><span class="cx">                 VALIDATE(value-&gt;child(0)-&gt;type() == Int64, (&quot;At &quot;, *value));
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Valuecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Value.cpp (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Value.cpp        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/b3/B3Value.cpp        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -325,6 +325,7 @@
</span><span class="cx">     case Shl:
</span><span class="cx">     case SShr:
</span><span class="cx">     case ZShr:
</span><ins>+    case Clz:
</ins><span class="cx">     case Sqrt:
</span><span class="cx">     case BitwiseCast:
</span><span class="cx">     case SExt8:
</span><span class="lines">@@ -406,6 +407,7 @@
</span><span class="cx">     case SExt16:
</span><span class="cx">     case SExt32:
</span><span class="cx">     case ZExt32:
</span><ins>+    case Clz:
</ins><span class="cx">     case Trunc:
</span><span class="cx">     case FRound:
</span><span class="cx">     case IToD:
</span><span class="lines">@@ -498,6 +500,7 @@
</span><span class="cx">     case Shl:
</span><span class="cx">     case SShr:
</span><span class="cx">     case ZShr:
</span><ins>+    case Clz:
</ins><span class="cx">     case Sqrt:
</span><span class="cx">     case CheckAdd:
</span><span class="cx">     case CheckSub:
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirOpcodeopcodes"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirOpcode.opcodes (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirOpcode.opcodes        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/b3/air/AirOpcode.opcodes        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -214,6 +214,14 @@
</span><span class="cx"> ConvertInt64ToDouble U:G, D:F
</span><span class="cx">     Tmp, Tmp
</span><span class="cx"> 
</span><ins>+CountLeadingZeros32 U:G, D:G
+    Tmp, Tmp
+    Addr, Tmp
+
+CountLeadingZeros64 U:G, D:G
+    Tmp, Tmp
+    Addr, Tmp
+
</ins><span class="cx"> # Note that Move operates over the full register size, which is either 32-bit or 64-bit depending on
</span><span class="cx"> # the platform. I'm not entirely sure that this is a good thing; it might be better to just have a
</span><span class="cx"> # Move64 instruction. OTOH, our MacroAssemblers already have this notion of &quot;move()&quot; that basically
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3testb3cpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/testb3.cpp (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/testb3.cpp        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/b3/testb3.cpp        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -1885,6 +1885,64 @@
</span><span class="cx">     CHECK(compileAndRun&lt;uint32_t&gt;(proc, a) == (a &gt;&gt; b));
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+template&lt;typename IntegerType&gt;
+static unsigned countLeadingZero(IntegerType value)
+{
+    unsigned bitCount = sizeof(IntegerType) * 8;
+    if (!value)
+        return bitCount;
+
+    unsigned counter = 0;
+    while (!(static_cast&lt;uint64_t&gt;(value) &amp; (1l &lt;&lt; (bitCount - 1)))) {
+        value &lt;&lt;= 1;
+        ++counter;
+    }
+    return counter;
+}
+
+void testClzArg64(int64_t a)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    Value* argument = root-&gt;appendNew&lt;ArgumentRegValue&gt;(proc, Origin(), GPRInfo::argumentGPR0);
+    Value* clzValue = root-&gt;appendNew&lt;Value&gt;(proc, Clz, Origin(), argument);
+    root-&gt;appendNew&lt;ControlValue&gt;(proc, Return, Origin(), clzValue);
+    CHECK(compileAndRun&lt;unsigned&gt;(proc, a) == countLeadingZero(a));
+}
+
+void testClzMem64(int64_t a)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    Value* address = root-&gt;appendNew&lt;ArgumentRegValue&gt;(proc, Origin(), GPRInfo::argumentGPR0);
+    MemoryValue* value = root-&gt;appendNew&lt;MemoryValue&gt;(proc, Load, Int64, Origin(), address);
+    Value* clzValue = root-&gt;appendNew&lt;Value&gt;(proc, Clz, Origin(), value);
+    root-&gt;appendNew&lt;ControlValue&gt;(proc, Return, Origin(), clzValue);
+    CHECK(compileAndRun&lt;unsigned&gt;(proc, &amp;a) == countLeadingZero(a));
+}
+
+void testClzArg32(int32_t a)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    Value* argument = root-&gt;appendNew&lt;Value&gt;(proc, Trunc, Origin(),
+        root-&gt;appendNew&lt;ArgumentRegValue&gt;(proc, Origin(), GPRInfo::argumentGPR0));
+    Value* clzValue = root-&gt;appendNew&lt;Value&gt;(proc, Clz, Origin(), argument);
+    root-&gt;appendNew&lt;ControlValue&gt;(proc, Return, Origin(), clzValue);
+    CHECK(compileAndRun&lt;unsigned&gt;(proc, a) == countLeadingZero(a));
+}
+
+void testClzMem32(int32_t a)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    Value* address = root-&gt;appendNew&lt;ArgumentRegValue&gt;(proc, Origin(), GPRInfo::argumentGPR0);
+    MemoryValue* value = root-&gt;appendNew&lt;MemoryValue&gt;(proc, Load, Int32, Origin(), address);
+    Value* clzValue = root-&gt;appendNew&lt;Value&gt;(proc, Clz, Origin(), value);
+    root-&gt;appendNew&lt;ControlValue&gt;(proc, Return, Origin(), clzValue);
+    CHECK(compileAndRun&lt;unsigned&gt;(proc, &amp;a) == countLeadingZero(a));
+}
+
</ins><span class="cx"> void testSqrtArg(double a)
</span><span class="cx"> {
</span><span class="cx">     Procedure proc;
</span><span class="lines">@@ -5317,8 +5375,8 @@
</span><span class="cx">     static const std::array&lt;DoubleOperand, 9&gt; operands = {{
</span><span class="cx">         { &quot;M_PI&quot;, M_PI },
</span><span class="cx">         { &quot;-M_PI&quot;, -M_PI },
</span><del>-        { &quot;1&quot;, 1 },
-        { &quot;-1&quot;, -1 },
</del><ins>+        { &quot;1.&quot;, 1 },
+        { &quot;-1.&quot;, -1 },
</ins><span class="cx">         { &quot;0&quot;, 0 },
</span><span class="cx">         { &quot;negativeZero()&quot;, negativeZero() },
</span><span class="cx">         { &quot;posInfinity()&quot;, posInfinity() },
</span><span class="lines">@@ -5835,6 +5893,11 @@
</span><span class="cx">     RUN(testZShrArgImm32(0xffffffff, 1));
</span><span class="cx">     RUN(testZShrArgImm32(0xffffffff, 63));
</span><span class="cx"> 
</span><ins>+    RUN_UNARY(testClzArg64, int64Operands());
+    RUN_UNARY(testClzMem64, int64Operands());
+    RUN_UNARY(testClzArg32, int32Operands());
+    RUN_UNARY(testClzMem32, int64Operands());
+
</ins><span class="cx">     RUN_UNARY(testSqrtArg, doubleOperands());
</span><span class="cx">     RUN_UNARY(testSqrtImm, doubleOperands());
</span><span class="cx">     RUN_UNARY(testSqrtMem, doubleOperands());
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreftlFTLB3Outputh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/ftl/FTLB3Output.h (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/ftl/FTLB3Output.h        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/ftl/FTLB3Output.h        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -148,7 +148,7 @@
</span><span class="cx">     LValue bitNot(LValue);
</span><span class="cx"> 
</span><span class="cx">     LValue ceil64(LValue operand) { CRASH(); }
</span><del>-    LValue ctlz32(LValue xOperand, LValue yOperand) { CRASH(); }
</del><ins>+    LValue ctlz32(LValue operand) { return m_block-&gt;appendNew&lt;B3::Value&gt;(m_proc, B3::Clz, origin(), operand); }
</ins><span class="cx">     LValue addWithOverflow32(LValue left, LValue right) { CRASH(); }
</span><span class="cx">     LValue subWithOverflow32(LValue left, LValue right) { CRASH(); }
</span><span class="cx">     LValue mulWithOverflow32(LValue left, LValue right) { CRASH(); }
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreftlFTLLowerDFGToLLVMcpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/ftl/FTLLowerDFGToLLVM.cpp (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/ftl/FTLLowerDFGToLLVM.cpp        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/ftl/FTLLowerDFGToLLVM.cpp        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -1728,8 +1728,7 @@
</span><span class="cx">     void compileArithClz32()
</span><span class="cx">     {
</span><span class="cx">         LValue operand = lowInt32(m_node-&gt;child1());
</span><del>-        LValue isZeroUndef = m_out.booleanFalse;
-        setInt32(m_out.ctlz32(operand, isZeroUndef));
</del><ins>+        setInt32(m_out.ctlz32(operand));
</ins><span class="cx">     }
</span><span class="cx">     
</span><span class="cx">     void compileArithMul()
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreftlFTLOutputh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/ftl/FTLOutput.h (193124 => 193125)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/ftl/FTLOutput.h        2015-12-03 18:49:02 UTC (rev 193124)
+++ trunk/Source/JavaScriptCore/ftl/FTLOutput.h        2015-12-03 18:49:04 UTC (rev 193125)
</span><span class="lines">@@ -147,9 +147,9 @@
</span><span class="cx">     {
</span><span class="cx">         return call(doubleType, ceil64Intrinsic(), operand);
</span><span class="cx">     }
</span><del>-    LValue ctlz32(LValue xOperand, LValue yOperand)
</del><ins>+    LValue ctlz32(LValue operand)
</ins><span class="cx">     {
</span><del>-        return call(int32, ctlz32Intrinsic(), xOperand, yOperand);
</del><ins>+        return call(int32, ctlz32Intrinsic(), operand, booleanFalse);
</ins><span class="cx">     }
</span><span class="cx">     LValue addWithOverflow32(LValue left, LValue right)
</span><span class="cx">     {
</span></span></pre>
</div>
</div>

</body>
</html>