<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[178173] trunk/Source</title>
</head>
<body>

<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.webkit.org/projects/webkit/changeset/178173">178173</a></dd>
<dt>Author</dt> <dd>commit-queue@webkit.org</dd>
<dt>Date</dt> <dd>2015-01-09 09:44:37 -0800 (Fri, 09 Jan 2015)</dd>
</dl>

<h3>Log Message</h3>
<pre>Unreviewed, rolling out <a href="http://trac.webkit.org/projects/webkit/changeset/178154">r178154</a>, <a href="http://trac.webkit.org/projects/webkit/changeset/178163">r178163</a>, and <a href="http://trac.webkit.org/projects/webkit/changeset/178164">r178164</a>.
https://bugs.webkit.org/show_bug.cgi?id=140292

Still multiple assertion failures on tests (Requested by ap on
#webkit).

Reverted changesets:

&quot;Modernize and streamline HTMLTokenizer&quot;
https://bugs.webkit.org/show_bug.cgi?id=140166
http://trac.webkit.org/changeset/178154

&quot;Unreviewed speculative buildfix after <a href="http://trac.webkit.org/projects/webkit/changeset/178154">r178154</a>.&quot;
http://trac.webkit.org/changeset/178163

&quot;One more unreviewed speculative buildfix after <a href="http://trac.webkit.org/projects/webkit/changeset/178154">r178154</a>.&quot;
http://trac.webkit.org/changeset/178164</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkSourceWTFChangeLog">trunk/Source/WTF/ChangeLog</a></li>
<li><a href="#trunkSourceWTFwtfForwardh">trunk/Source/WTF/wtf/Forward.h</a></li>
<li><a href="#trunkSourceWebCoreChangeLog">trunk/Source/WebCore/ChangeLog</a></li>
<li><a href="#trunkSourceWebCorehtmlparserAtomicHTMLTokenh">trunk/Source/WebCore/html/parser/AtomicHTMLToken.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLDocumentParsercpp">trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLDocumentParserh">trunk/Source/WebCore/html/parser/HTMLDocumentParser.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLEntityParsercpp">trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLInputStreamh">trunk/Source/WebCore/html/parser/HTMLInputStream.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLMetaCharsetParsercpp">trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLMetaCharsetParserh">trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLPreloadScannercpp">trunk/Source/WebCore/html/parser/HTMLPreloadScanner.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLPreloadScannerh">trunk/Source/WebCore/html/parser/HTMLPreloadScanner.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLResourcePreloadercpp">trunk/Source/WebCore/html/parser/HTMLResourcePreloader.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLResourcePreloaderh">trunk/Source/WebCore/html/parser/HTMLResourcePreloader.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLSourceTrackercpp">trunk/Source/WebCore/html/parser/HTMLSourceTracker.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLSourceTrackerh">trunk/Source/WebCore/html/parser/HTMLSourceTracker.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLTokenh">trunk/Source/WebCore/html/parser/HTMLToken.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLTokenizercpp">trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLTokenizerh">trunk/Source/WebCore/html/parser/HTMLTokenizer.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserHTMLTreeBuildercpp">trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserInputStreamPreprocessorh">trunk/Source/WebCore/html/parser/InputStreamPreprocessor.h</a></li>
<li><a href="#trunkSourceWebCorehtmlparserTextDocumentParsercpp">trunk/Source/WebCore/html/parser/TextDocumentParser.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserXSSAuditorcpp">trunk/Source/WebCore/html/parser/XSSAuditor.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmlparserXSSAuditorh">trunk/Source/WebCore/html/parser/XSSAuditor.h</a></li>
<li><a href="#trunkSourceWebCorehtmltrackWebVTTTokenizercpp">trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp</a></li>
<li><a href="#trunkSourceWebCorehtmltrackWebVTTTokenizerh">trunk/Source/WebCore/html/track/WebVTTTokenizer.h</a></li>
<li><a href="#trunkSourceWebCoreplatformtextSegmentedStringcpp">trunk/Source/WebCore/platform/text/SegmentedString.cpp</a></li>
<li><a href="#trunkSourceWebCoreplatformtextSegmentedStringh">trunk/Source/WebCore/platform/text/SegmentedString.h</a></li>
<li><a href="#trunkSourceWebCorexmlparserCharacterReferenceParserInlinesh">trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlines.h</a></li>
<li><a href="#trunkSourceWebCorexmlparserMarkupTokenizerInlinesh">trunk/Source/WebCore/xml/parser/MarkupTokenizerInlines.h</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkSourceWTFChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Source/WTF/ChangeLog (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WTF/ChangeLog        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WTF/ChangeLog        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,3 +1,23 @@
</span><ins>+2015-01-09  Commit Queue  &lt;commit-queue@webkit.org&gt;
+
+        Unreviewed, rolling out r178154, r178163, and r178164.
+        https://bugs.webkit.org/show_bug.cgi?id=140292
+
+        Still multiple assertion failures on tests (Requested by ap on
+        #webkit).
+
+        Reverted changesets:
+
+        &quot;Modernize and streamline HTMLTokenizer&quot;
+        https://bugs.webkit.org/show_bug.cgi?id=140166
+        http://trac.webkit.org/changeset/178154
+
+        &quot;Unreviewed speculative buildfix after r178154.&quot;
+        http://trac.webkit.org/changeset/178163
+
+        &quot;One more unreviewed speculative buildfix after r178154.&quot;
+        http://trac.webkit.org/changeset/178164
+
</ins><span class="cx"> 2015-01-08  Darin Adler  &lt;darin@apple.com&gt;
</span><span class="cx"> 
</span><span class="cx">         Modernize and streamline HTMLTokenizer
</span></span></pre></div>
<a id="trunkSourceWTFwtfForwardh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WTF/wtf/Forward.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WTF/wtf/Forward.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WTF/wtf/Forward.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -30,6 +30,7 @@
</span><span class="cx"> template&lt;typename T&gt; class NeverDestroyed;
</span><span class="cx"> template&lt;typename T&gt; class OwnPtr;
</span><span class="cx"> template&lt;typename T&gt; class PassOwnPtr;
</span><ins>+template&lt;typename T&gt; class PassRef;
</ins><span class="cx"> template&lt;typename T&gt; class PassRefPtr;
</span><span class="cx"> template&lt;typename T&gt; class RefPtr;
</span><span class="cx"> template&lt;typename T&gt; class Ref;
</span><span class="lines">@@ -44,13 +45,11 @@
</span><span class="cx"> class Decoder;
</span><span class="cx"> class Encoder;
</span><span class="cx"> class FunctionDispatcher;
</span><del>-class OrdinalNumber;
</del><span class="cx"> class PrintStream;
</span><span class="cx"> class String;
</span><span class="cx"> class StringBuilder;
</span><span class="cx"> class StringImpl;
</span><span class="cx"> class StringView;
</span><del>-class TextPosition;
</del><span class="cx"> 
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -64,9 +63,9 @@
</span><span class="cx"> using WTF::FunctionDispatcher;
</span><span class="cx"> using WTF::LazyNeverDestroyed;
</span><span class="cx"> using WTF::NeverDestroyed;
</span><del>-using WTF::OrdinalNumber;
</del><span class="cx"> using WTF::OwnPtr;
</span><span class="cx"> using WTF::PassOwnPtr;
</span><ins>+using WTF::PassRef;
</ins><span class="cx"> using WTF::PassRefPtr;
</span><span class="cx"> using WTF::PrintStream;
</span><span class="cx"> using WTF::Ref;
</span><span class="lines">@@ -76,7 +75,6 @@
</span><span class="cx"> using WTF::StringBuilder;
</span><span class="cx"> using WTF::StringImpl;
</span><span class="cx"> using WTF::StringView;
</span><del>-using WTF::TextPosition;
</del><span class="cx"> using WTF::Vector;
</span><span class="cx"> 
</span><span class="cx"> #endif // WTF_Forward_h
</span></span></pre></div>
<a id="trunkSourceWebCoreChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/ChangeLog (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/ChangeLog        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/ChangeLog        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,3 +1,23 @@
</span><ins>+2015-01-09  Commit Queue  &lt;commit-queue@webkit.org&gt;
+
+        Unreviewed, rolling out r178154, r178163, and r178164.
+        https://bugs.webkit.org/show_bug.cgi?id=140292
+
+        Still multiple assertion failures on tests (Requested by ap on
+        #webkit).
+
+        Reverted changesets:
+
+        &quot;Modernize and streamline HTMLTokenizer&quot;
+        https://bugs.webkit.org/show_bug.cgi?id=140166
+        http://trac.webkit.org/changeset/178154
+
+        &quot;Unreviewed speculative buildfix after r178154.&quot;
+        http://trac.webkit.org/changeset/178163
+
+        &quot;One more unreviewed speculative buildfix after r178154.&quot;
+        http://trac.webkit.org/changeset/178164
+
</ins><span class="cx"> 2015-01-09  Bartlomiej Gajda  &lt;b.gajda@samsung.com&gt;
</span><span class="cx"> 
</span><span class="cx">         [MSE] Implement Append Window support.
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserAtomicHTMLTokenh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/AtomicHTMLToken.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/AtomicHTMLToken.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/AtomicHTMLToken.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -191,6 +191,11 @@
</span><span class="cx">         if (attribute.name.isEmpty())
</span><span class="cx">             continue;
</span><span class="cx"> 
</span><ins>+        ASSERT(attribute.nameRange.start);
+        ASSERT(attribute.nameRange.end);
+        ASSERT(attribute.valueRange.start);
+        ASSERT(attribute.valueRange.end);
+
</ins><span class="cx">         QualifiedName name(nullAtom, AtomicString(attribute.name), nullAtom);
</span><span class="cx"> 
</span><span class="cx">         // FIXME: This is N^2 for the number of attributes.
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLDocumentParsercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -39,6 +39,28 @@
</span><span class="cx"> 
</span><span class="cx"> using namespace HTMLNames;
</span><span class="cx"> 
</span><ins>+// This is a direct transcription of step 4 from:
+// https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
+static HTMLTokenizer::State tokenizerStateForContextElement(Element&amp; contextElement, bool reportErrors, const HTMLParserOptions&amp; options)
+{
+    const QualifiedName&amp; contextTag = contextElement.tagQName();
+
+    if (contextTag.matches(titleTag) || contextTag.matches(textareaTag))
+        return HTMLTokenizer::RCDATAState;
+    if (contextTag.matches(styleTag)
+        || contextTag.matches(xmpTag)
+        || contextTag.matches(iframeTag)
+        || (contextTag.matches(noembedTag) &amp;&amp; options.pluginsEnabled)
+        || (contextTag.matches(noscriptTag) &amp;&amp; options.scriptEnabled)
+        || contextTag.matches(noframesTag))
+        return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState;
+    if (contextTag.matches(scriptTag))
+        return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState;
+    if (contextTag.matches(plaintextTag))
+        return HTMLTokenizer::PLAINTEXTState;
+    return HTMLTokenizer::DataState;
+}
+
</ins><span class="cx"> HTMLDocumentParser::HTMLDocumentParser(HTMLDocument&amp; document)
</span><span class="cx">     : ScriptableDocumentParser(document)
</span><span class="cx">     , m_options(document)
</span><span class="lines">@@ -63,9 +85,8 @@
</span><span class="cx">     , m_treeBuilder(std::make_unique&lt;HTMLTreeBuilder&gt;(*this, fragment, contextElement, parserContentPolicy(), m_options))
</span><span class="cx">     , m_xssAuditorDelegate(fragment.document())
</span><span class="cx"> {
</span><del>-    // https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
-    if (contextElement.isHTMLElement())
-        m_tokenizer.updateStateFor(contextElement.tagQName().localName());
</del><ins>+    bool reportErrors = false; // For now document fragment parsing never reports errors.
+    m_tokenizer.setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options));
</ins><span class="cx">     m_xssAuditor.initForFragment();
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -258,22 +279,22 @@
</span><span class="cx"> 
</span><span class="cx">     while (canTakeNextToken(mode, session) &amp;&amp; !session.needsYield) {
</span><span class="cx">         if (!isParsingFragment())
</span><del>-            m_sourceTracker.startToken(m_input.current(), m_tokenizer);
</del><ins>+            m_sourceTracker.start(m_input.current(), &amp;m_tokenizer, m_token);
</ins><span class="cx"> 
</span><del>-        auto token = m_tokenizer.nextToken(m_input.current());
-        if (!token)
</del><ins>+        if (!m_tokenizer.nextToken(m_input.current(), m_token))
</ins><span class="cx">             break;
</span><span class="cx"> 
</span><span class="cx">         if (!isParsingFragment()) {
</span><del>-            m_sourceTracker.endToken(m_input.current(), m_tokenizer);
</del><ins>+            m_sourceTracker.end(m_input.current(), &amp;m_tokenizer, m_token);
</ins><span class="cx"> 
</span><span class="cx">             // We do not XSS filter innerHTML, which means we (intentionally) fail
</span><span class="cx">             // http/tests/security/xssAuditor/dom-write-innerHTML.html
</span><del>-            if (auto xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(*token, m_sourceTracker, m_tokenizer.shouldAllowCDATA())))
</del><ins>+            if (auto xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(m_token, m_sourceTracker, m_tokenizer.shouldAllowCDATA())))
</ins><span class="cx">                 m_xssAuditorDelegate.didBlockScript(*xssInfo);
</span><span class="cx">         }
</span><span class="cx"> 
</span><del>-        constructTreeFromHTMLToken(token);
</del><ins>+        constructTreeFromHTMLToken(m_token);
+        ASSERT(m_token.type() == HTMLToken::Uninitialized);
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     // Ensure we haven't been totally deref'ed after pumping. Any caller of this
</span><span class="lines">@@ -287,20 +308,20 @@
</span><span class="cx">         m_parserScheduler-&gt;scheduleForResume();
</span><span class="cx"> 
</span><span class="cx">     if (isWaitingForScripts()) {
</span><del>-        ASSERT(m_tokenizer.isInDataState());
</del><ins>+        ASSERT(m_tokenizer.state() == HTMLTokenizer::DataState);
</ins><span class="cx">         if (!m_preloadScanner) {
</span><span class="cx">             m_preloadScanner = std::make_unique&lt;HTMLPreloadScanner&gt;(m_options, document()-&gt;url(), document()-&gt;deviceScaleFactor());
</span><span class="cx">             m_preloadScanner-&gt;appendToEnd(m_input.current());
</span><span class="cx">         }
</span><del>-        m_preloadScanner-&gt;scan(*m_preloader, *document());
</del><ins>+        m_preloadScanner-&gt;scan(m_preloader.get(), *document());
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt());
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLTokenizer::TokenPtr&amp; rawToken)
</del><ins>+void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken&amp; rawToken)
</ins><span class="cx"> {
</span><del>-    AtomicHTMLToken token(*rawToken);
</del><ins>+    AtomicHTMLToken token(rawToken);
</ins><span class="cx"> 
</span><span class="cx">     // We clear the rawToken in case constructTreeFromAtomicToken
</span><span class="cx">     // synchronously re-enters the parser. We don't clear the token immedately
</span><span class="lines">@@ -312,13 +333,15 @@
</span><span class="cx">     // FIXME: Stop clearing the rawToken once we start running the parser off
</span><span class="cx">     // the main thread or once we stop allowing synchronous JavaScript
</span><span class="cx">     // execution from parseAttribute.
</span><del>-    if (rawToken-&gt;type() != HTMLToken::Character) {
-        // Clearing the TokenPtr makes sure we don't clear the HTMLToken a second time
-        // later when the TokenPtr is destroyed.
</del><ins>+    if (rawToken.type() != HTMLToken::Character)
</ins><span class="cx">         rawToken.clear();
</span><del>-    }
</del><span class="cx"> 
</span><span class="cx">     m_treeBuilder-&gt;constructTree(token);
</span><ins>+
+    if (rawToken.type() != HTMLToken::Uninitialized) {
+        ASSERT(rawToken.type() == HTMLToken::Character);
+        rawToken.clear();
+    }
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> bool HTMLDocumentParser::hasInsertionPoint()
</span><span class="lines">@@ -350,7 +373,7 @@
</span><span class="cx">         if (!m_insertionPreloadScanner)
</span><span class="cx">             m_insertionPreloadScanner = std::make_unique&lt;HTMLPreloadScanner&gt;(m_options, document()-&gt;url(), document()-&gt;deviceScaleFactor());
</span><span class="cx">         m_insertionPreloadScanner-&gt;appendToEnd(source);
</span><del>-        m_insertionPreloadScanner-&gt;scan(*m_preloader, *document());
</del><ins>+        m_insertionPreloadScanner-&gt;scan(m_preloader.get(), *document());
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     endIfDelayed();
</span><span class="lines">@@ -375,7 +398,7 @@
</span><span class="cx">         } else {
</span><span class="cx">             m_preloadScanner-&gt;appendToEnd(source);
</span><span class="cx">             if (isWaitingForScripts())
</span><del>-                m_preloadScanner-&gt;scan(*m_preloader, *document());
</del><ins>+                m_preloadScanner-&gt;scan(m_preloader.get(), *document());
</ins><span class="cx">         }
</span><span class="cx">     }
</span><span class="cx"> 
</span><span class="lines">@@ -510,7 +533,7 @@
</span><span class="cx"> {
</span><span class="cx">     ASSERT(m_preloadScanner);
</span><span class="cx">     m_preloadScanner-&gt;appendToEnd(m_input.current());
</span><del>-    m_preloadScanner-&gt;scan(*m_preloader, *document());
</del><ins>+    m_preloadScanner-&gt;scan(m_preloader.get(), *document());
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource)
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLDocumentParserh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -103,7 +103,7 @@
</span><span class="cx">     bool canTakeNextToken(SynchronousMode, PumpSession&amp;);
</span><span class="cx">     void pumpTokenizer(SynchronousMode);
</span><span class="cx">     void pumpTokenizerIfPossible(SynchronousMode);
</span><del>-    void constructTreeFromHTMLToken(HTMLTokenizer::TokenPtr&amp;);
</del><ins>+    void constructTreeFromHTMLToken(HTMLToken&amp;);
</ins><span class="cx"> 
</span><span class="cx">     void runScriptsForPausedTreeBuilder();
</span><span class="cx">     void resumeParsingAfterScriptExecution();
</span><span class="lines">@@ -121,6 +121,7 @@
</span><span class="cx">     HTMLParserOptions m_options;
</span><span class="cx">     HTMLInputStream m_input;
</span><span class="cx"> 
</span><ins>+    HTMLToken m_token;
</ins><span class="cx">     HTMLTokenizer m_tokenizer;
</span><span class="cx">     std::unique_ptr&lt;HTMLScriptRunner&gt; m_scriptRunner;
</span><span class="cx">     std::unique_ptr&lt;HTMLTreeBuilder&gt; m_treeBuilder;
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLEntityParsercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -60,9 +60,9 @@
</span><span class="cx">         return windowsLatin1ExtensionArray[value - 0x80];
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    static bool acceptMalformed() { return true; }
</del><ins>+    inline static bool acceptMalformed() { return true; }
</ins><span class="cx"> 
</span><del>-    static bool consumeNamedEntity(SegmentedString&amp; source, StringBuilder&amp; decodedEntity, bool&amp; notEnoughCharacters, UChar additionalAllowedCharacter, UChar&amp; cc)
</del><ins>+    inline static bool consumeNamedEntity(SegmentedString&amp; source, StringBuilder&amp; decodedEntity, bool&amp; notEnoughCharacters, UChar additionalAllowedCharacter, UChar&amp; cc)
</ins><span class="cx">     {
</span><span class="cx">         StringBuilder consumedCharacters;
</span><span class="cx">         HTMLEntitySearch entitySearch;
</span><span class="lines">@@ -72,7 +72,7 @@
</span><span class="cx">             if (!entitySearch.isEntityPrefix())
</span><span class="cx">                 break;
</span><span class="cx">             consumedCharacters.append(cc);
</span><del>-            source.advance();
</del><ins>+            source.advanceAndASSERT(cc);
</ins><span class="cx">         }
</span><span class="cx">         notEnoughCharacters = source.isEmpty();
</span><span class="cx">         if (notEnoughCharacters) {
</span><span class="lines">@@ -97,7 +97,7 @@
</span><span class="cx">                 cc = source.currentChar();
</span><span class="cx">                 ASSERT_UNUSED(reference, cc == *reference++);
</span><span class="cx">                 consumedCharacters.append(cc);
</span><del>-                source.advance();
</del><ins>+                source.advanceAndASSERT(cc);
</ins><span class="cx">                 ASSERT(!source.isEmpty());
</span><span class="cx">             }
</span><span class="cx">             cc = source.currentChar();
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLInputStreamh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLInputStream.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLInputStream.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLInputStream.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -28,7 +28,6 @@
</span><span class="cx"> 
</span><span class="cx"> #include &quot;InputStreamPreprocessor.h&quot;
</span><span class="cx"> #include &quot;SegmentedString.h&quot;
</span><del>-#include &lt;wtf/text/TextPosition.h&gt;
</del><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLMetaCharsetParsercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,6 +1,5 @@
</span><span class="cx"> /*
</span><span class="cx">  * Copyright (C) 2010 Google Inc. All Rights Reserved.
</span><del>- * Copyright (C) 2015 Apple Inc. All Rights Reserved.
</del><span class="cx">  *
</span><span class="cx">  * Redistribution and use in source and binary forms, with or without
</span><span class="cx">  * modification, are permitted provided that the following conditions
</span><span class="lines">@@ -29,26 +28,41 @@
</span><span class="cx"> 
</span><span class="cx"> #include &quot;HTMLNames.h&quot;
</span><span class="cx"> #include &quot;HTMLParserIdioms.h&quot;
</span><ins>+#include &quot;HTMLTokenizer.h&quot;
+#include &quot;TextCodec.h&quot;
</ins><span class="cx"> #include &quot;TextEncodingRegistry.h&quot;
</span><span class="cx"> 
</span><ins>+using namespace WTF;
+
</ins><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><span class="cx"> using namespace HTMLNames;
</span><span class="cx"> 
</span><span class="cx"> HTMLMetaCharsetParser::HTMLMetaCharsetParser()
</span><del>-    : m_codec(newTextCodec(Latin1Encoding()))
</del><ins>+    : m_tokenizer(std::make_unique&lt;HTMLTokenizer&gt;(HTMLParserOptions()))
+    , m_assumedCodec(newTextCodec(Latin1Encoding()))
+    , m_inHeadSection(true)
+    , m_doneChecking(false)
</ins><span class="cx"> {
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-static StringView extractCharset(const String&amp; value)
</del><ins>+HTMLMetaCharsetParser::~HTMLMetaCharsetParser()
</ins><span class="cx"> {
</span><ins>+}
+
+static const char charsetString[] = &quot;charset&quot;;
+static const size_t charsetLength = sizeof(&quot;charset&quot;) - 1;
+
+String HTMLMetaCharsetParser::extractCharset(const String&amp; value)
+{
+    size_t pos = 0;
</ins><span class="cx">     unsigned length = value.length();
</span><del>-    for (size_t pos = 0; pos &lt; length; ) {
-        pos = value.find(&quot;charset&quot;, pos, false);
</del><ins>+
+    while (pos &lt; length) {
+        pos = value.find(charsetString, pos, false);
</ins><span class="cx">         if (pos == notFound)
</span><span class="cx">             break;
</span><span class="cx"> 
</span><del>-        static const size_t charsetLength = sizeof(&quot;charset&quot;) - 1;
</del><span class="cx">         pos += charsetLength;
</span><span class="cx"> 
</span><span class="cx">         // Skip whitespace.
</span><span class="lines">@@ -63,10 +77,12 @@
</span><span class="cx">         while (pos &lt; length &amp;&amp; value[pos] &lt;= ' ')
</span><span class="cx">             ++pos;
</span><span class="cx"> 
</span><del>-        UChar quoteMark = 0;
-        if (pos &lt; length &amp;&amp; (value[pos] == '&quot;' || value[pos] == '\''))
-            quoteMark = value[pos++];
-
</del><ins>+        char quoteMark = 0;
+        if (pos &lt; length &amp;&amp; (value[pos] == '&quot;' || value[pos] == '\'')) {
+            quoteMark = static_cast&lt;char&gt;(value[pos++]);
+            ASSERT(!(quoteMark &amp; 0x80));
+        }
+            
</ins><span class="cx">         if (pos == length)
</span><span class="cx">             break;
</span><span class="cx"> 
</span><span class="lines">@@ -77,17 +93,19 @@
</span><span class="cx">         if (quoteMark &amp;&amp; (end == length))
</span><span class="cx">             break; // Close quote not found.
</span><span class="cx"> 
</span><del>-        return StringView(value).substring(pos, end - pos);
</del><ins>+        return value.substring(pos, end - pos);
</ins><span class="cx">     }
</span><del>-    return StringView();
</del><ins>+
+    return &quot;&quot;;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-bool HTMLMetaCharsetParser::processMeta(HTMLToken&amp; token)
</del><ins>+bool HTMLMetaCharsetParser::processMeta()
</ins><span class="cx"> {
</span><ins>+    const HTMLToken::AttributeList&amp; tokenAttributes = m_token.attributes();
</ins><span class="cx">     AttributeList attributes;
</span><del>-    for (auto&amp; attribute : token.attributes()) {
-        String attributeName = StringImpl::create8BitIfPossible(attribute.name);
-        String attributeValue = StringImpl::create8BitIfPossible(attribute.value);
</del><ins>+    for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin(); iter != tokenAttributes.end(); ++iter) {
+        String attributeName = StringImpl::create8BitIfPossible(iter-&gt;name);
+        String attributeValue = StringImpl::create8BitIfPossible(iter-&gt;value);
</ins><span class="cx">         attributes.append(std::make_pair(attributeName, attributeValue));
</span><span class="cx">     }
</span><span class="cx"> 
</span><span class="lines">@@ -98,12 +116,12 @@
</span><span class="cx"> TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeList&amp; attributes)
</span><span class="cx"> {
</span><span class="cx">     bool gotPragma = false;
</span><del>-    enum { None, Charset, Pragma } mode = None;
-    StringView charset;
</del><ins>+    Mode mode = None;
+    String charset;
</ins><span class="cx"> 
</span><del>-    for (auto&amp; attribute : attributes) {
-        const String&amp; attributeName = attribute.first;
-        const String&amp; attributeValue = attribute.second;
</del><ins>+    for (AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
+        const AtomicString&amp; attributeName = iter-&gt;first;
+        const String&amp; attributeValue = iter-&gt;second;
</ins><span class="cx"> 
</span><span class="cx">         if (attributeName == http_equivAttr) {
</span><span class="cx">             if (equalIgnoringCase(attributeValue, &quot;content-type&quot;))
</span><span class="lines">@@ -121,11 +139,13 @@
</span><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     if (mode == Charset || (mode == Pragma &amp;&amp; gotPragma))
</span><del>-        return TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset.toStringWithoutCopying()));
</del><ins>+        return TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
</ins><span class="cx"> 
</span><span class="cx">     return TextEncoding();
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if &lt;head&gt; section is over.
+
</ins><span class="cx"> bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
</span><span class="cx"> {
</span><span class="cx">     if (m_doneChecking)
</span><span class="lines">@@ -136,32 +156,30 @@
</span><span class="cx">     // We still don't have an encoding, and are in the head.
</span><span class="cx">     // The following tags are allowed in &lt;head&gt;:
</span><span class="cx">     // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
</span><del>-    //
</del><ins>+
</ins><span class="cx">     // We stop scanning when a tag that is not permitted in &lt;head&gt;
</span><span class="cx">     // is seen, rather when &lt;/head&gt; is seen, because that more closely
</span><span class="cx">     // matches behavior in other browsers; more details in
</span><span class="cx">     // &lt;http://bugs.webkit.org/show_bug.cgi?id=3590&gt;.
</span><del>-    //
</del><ins>+
</ins><span class="cx">     // Additionally, we ignore things that looks like tags in &lt;title&gt;, &lt;script&gt;
</span><span class="cx">     // and &lt;noscript&gt;; see &lt;http://bugs.webkit.org/show_bug.cgi?id=4560&gt;,
</span><span class="cx">     // &lt;http://bugs.webkit.org/show_bug.cgi?id=12165&gt; and
</span><span class="cx">     // &lt;http://bugs.webkit.org/show_bug.cgi?id=12389&gt;.
</span><del>-    //
</del><ins>+
</ins><span class="cx">     // Since many sites have charset declarations after &lt;body&gt; or other tags
</span><span class="cx">     // that are disallowed in &lt;head&gt;, we don't bail out until we've checked at
</span><span class="cx">     // least bytesToCheckUnconditionally bytes of input.
</span><span class="cx"> 
</span><del>-    static const int bytesToCheckUnconditionally = 1024;
</del><ins>+    m_input.append(SegmentedString(m_assumedCodec-&gt;decode(data, length)));
</ins><span class="cx"> 
</span><del>-    m_input.append(SegmentedString(m_codec-&gt;decode(data, length)));
-
-    while (auto token = m_tokenizer.nextToken(m_input)) {
-        bool isEnd = token-&gt;type() == HTMLToken::EndTag;
-        if (isEnd || token-&gt;type() == HTMLToken::StartTag) {
-            AtomicString tagName(token-&gt;name());
-            if (!isEnd) {
-                m_tokenizer.updateStateFor(tagName);
-                if (tagName == metaTag &amp;&amp; processMeta(*token)) {
</del><ins>+    while (m_tokenizer-&gt;nextToken(m_input, m_token)) {
+        bool end = m_token.type() == HTMLToken::EndTag;
+        if (end || m_token.type() == HTMLToken::StartTag) {
+            AtomicString tagName(m_token.name());
+            if (!end) {
+                m_tokenizer-&gt;updateStateFor(tagName);
+                if (tagName == metaTag &amp;&amp; processMeta()) {
</ins><span class="cx">                     m_doneChecking = true;
</span><span class="cx">                     return true;
</span><span class="cx">                 }
</span><span class="lines">@@ -171,8 +189,7 @@
</span><span class="cx">                 &amp;&amp; tagName != styleTag &amp;&amp; tagName != linkTag
</span><span class="cx">                 &amp;&amp; tagName != metaTag &amp;&amp; tagName != objectTag
</span><span class="cx">                 &amp;&amp; tagName != titleTag &amp;&amp; tagName != baseTag
</span><del>-                &amp;&amp; (isEnd || tagName != htmlTag)
-                &amp;&amp; (isEnd || tagName != headTag)) {
</del><ins>+                &amp;&amp; (end || tagName != htmlTag) &amp;&amp; (end || tagName != headTag)) {
</ins><span class="cx">                 m_inHeadSection = false;
</span><span class="cx">             }
</span><span class="cx">         }
</span><span class="lines">@@ -181,6 +198,8 @@
</span><span class="cx">             m_doneChecking = true;
</span><span class="cx">             return true;
</span><span class="cx">         }
</span><ins>+
+        m_token.clear();
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     return false;
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLMetaCharsetParserh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -26,36 +26,49 @@
</span><span class="cx"> #ifndef HTMLMetaCharsetParser_h
</span><span class="cx"> #define HTMLMetaCharsetParser_h
</span><span class="cx"> 
</span><del>-#include &quot;HTMLTokenizer.h&quot;
</del><ins>+#include &quot;HTMLToken.h&quot;
</ins><span class="cx"> #include &quot;SegmentedString.h&quot;
</span><span class="cx"> #include &quot;TextEncoding.h&quot;
</span><ins>+#include &lt;wtf/Noncopyable.h&gt;
</ins><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><ins>+class HTMLTokenizer;
</ins><span class="cx"> class TextCodec;
</span><span class="cx"> 
</span><span class="cx"> class HTMLMetaCharsetParser {
</span><span class="cx">     WTF_MAKE_NONCOPYABLE(HTMLMetaCharsetParser); WTF_MAKE_FAST_ALLOCATED;
</span><span class="cx"> public:
</span><span class="cx">     HTMLMetaCharsetParser();
</span><ins>+    ~HTMLMetaCharsetParser();
</ins><span class="cx"> 
</span><span class="cx">     // Returns true if done checking, regardless whether an encoding is found.
</span><span class="cx">     bool checkForMetaCharset(const char*, size_t);
</span><span class="cx"> 
</span><span class="cx">     const TextEncoding&amp; encoding() { return m_encoding; }
</span><span class="cx"> 
</span><ins>+    typedef Vector&lt;std::pair&lt;String, String&gt;&gt; AttributeList;
</ins><span class="cx">     // The returned encoding might not be valid.
</span><del>-    typedef Vector&lt;std::pair&lt;String, String&gt;&gt; AttributeList;
-    static TextEncoding encodingFromMetaAttributes(const AttributeList&amp;);
</del><ins>+    static TextEncoding encodingFromMetaAttributes(const AttributeList&amp;
+);
</ins><span class="cx"> 
</span><span class="cx"> private:
</span><del>-    bool processMeta(HTMLToken&amp;);
</del><ins>+    bool processMeta();
+    static String extractCharset(const String&amp;);
</ins><span class="cx"> 
</span><del>-    HTMLTokenizer m_tokenizer;
-    const std::unique_ptr&lt;TextCodec&gt; m_codec;
</del><ins>+    enum Mode {
+        None,
+        Charset,
+        Pragma,
+    };
+
+    std::unique_ptr&lt;HTMLTokenizer&gt; m_tokenizer;
+    std::unique_ptr&lt;TextCodec&gt; m_assumedCodec;
</ins><span class="cx">     SegmentedString m_input;
</span><del>-    bool m_inHeadSection { true };
-    bool m_doneChecking { false };
</del><ins>+    HTMLToken m_token;
+    bool m_inHeadSection;
+
+    bool m_doneChecking;
</ins><span class="cx">     TextEncoding m_encoding;
</span><span class="cx"> };
</span><span class="cx"> 
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLPreloadScannercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLPreloadScanner.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLPreloadScanner.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLPreloadScanner.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -242,10 +242,42 @@
</span><span class="cx"> 
</span><span class="cx"> TokenPreloadScanner::TokenPreloadScanner(const URL&amp; documentURL, float deviceScaleFactor)
</span><span class="cx">     : m_documentURL(documentURL)
</span><ins>+    , m_inStyle(false)
</ins><span class="cx">     , m_deviceScaleFactor(deviceScaleFactor)
</span><ins>+#if ENABLE(TEMPLATE_ELEMENT)
+    , m_templateCount(0)
+#endif
</ins><span class="cx"> {
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+TokenPreloadScanner::~TokenPreloadScanner()
+{
+}
+
+TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
+{
+    TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
+    m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle
+#if ENABLE(TEMPLATE_ELEMENT)
+                                    , m_templateCount
+#endif
+                                    ));
+    return checkpoint;
+}
+
+void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
+{
+    ASSERT(checkpointIndex &lt; m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
+    const Checkpoint&amp; checkpoint = m_checkpoints[checkpointIndex];
+    m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
+    m_inStyle = checkpoint.inStyle;
+#if ENABLE(TEMPLATE_ELEMENT)
+    m_templateCount = checkpoint.templateCount;
+#endif
+    m_cssScanner.reset();
+    m_checkpoints.clear();
+}
+
</ins><span class="cx"> void TokenPreloadScanner::scan(const HTMLToken&amp; token, Vector&lt;std::unique_ptr&lt;PreloadRequest&gt;&gt;&amp; requests, Document&amp; document)
</span><span class="cx"> {
</span><span class="cx">     switch (token.type()) {
</span><span class="lines">@@ -317,16 +349,20 @@
</span><span class="cx"> 
</span><span class="cx"> HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions&amp; options, const URL&amp; documentURL, float deviceScaleFactor)
</span><span class="cx">     : m_scanner(documentURL, deviceScaleFactor)
</span><del>-    , m_tokenizer(options)
</del><ins>+    , m_tokenizer(std::make_unique&lt;HTMLTokenizer&gt;(options))
</ins><span class="cx"> {
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+HTMLPreloadScanner::~HTMLPreloadScanner()
+{
+}
+
</ins><span class="cx"> void HTMLPreloadScanner::appendToEnd(const SegmentedString&amp; source)
</span><span class="cx"> {
</span><span class="cx">     m_source.append(source);
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void HTMLPreloadScanner::scan(HTMLResourcePreloader&amp; preloader, Document&amp; document)
</del><ins>+void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, Document&amp; document)
</ins><span class="cx"> {
</span><span class="cx">     ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
</span><span class="cx"> 
</span><span class="lines">@@ -338,13 +374,14 @@
</span><span class="cx"> 
</span><span class="cx">     PreloadRequestStream requests;
</span><span class="cx"> 
</span><del>-    while (auto token = m_tokenizer.nextToken(m_source)) {
-        if (token-&gt;type() == HTMLToken::StartTag)
-            m_tokenizer.updateStateFor(AtomicString(token-&gt;name()));
-        m_scanner.scan(*token, requests, document);
</del><ins>+    while (m_tokenizer-&gt;nextToken(m_source, m_token)) {
+        if (m_token.type() == HTMLToken::StartTag)
+            m_tokenizer-&gt;updateStateFor(AtomicString(m_token.name()));
+        m_scanner.scan(m_token, requests, document);
+        m_token.clear();
</ins><span class="cx">     }
</span><span class="cx"> 
</span><del>-    preloader.preload(WTF::move(requests));
</del><ins>+    preloader-&gt;preload(WTF::move(requests));
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> }
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLPreloadScannerh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLPreloadScanner.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLPreloadScanner.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLPreloadScanner.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -28,20 +28,40 @@
</span><span class="cx"> #define HTMLPreloadScanner_h
</span><span class="cx"> 
</span><span class="cx"> #include &quot;CSSPreloadScanner.h&quot;
</span><del>-#include &quot;HTMLTokenizer.h&quot;
</del><ins>+#include &quot;HTMLToken.h&quot;
</ins><span class="cx"> #include &quot;SegmentedString.h&quot;
</span><ins>+#include &lt;wtf/Vector.h&gt;
</ins><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><ins>+typedef size_t TokenPreloadScannerCheckpoint;
+
+class HTMLParserOptions;
+class HTMLTokenizer;
+class SegmentedString;
+class Frame;
+
</ins><span class="cx"> class TokenPreloadScanner {
</span><del>-    WTF_MAKE_NONCOPYABLE(TokenPreloadScanner);
</del><ins>+    WTF_MAKE_NONCOPYABLE(TokenPreloadScanner); WTF_MAKE_FAST_ALLOCATED;
</ins><span class="cx"> public:
</span><span class="cx">     explicit TokenPreloadScanner(const URL&amp; documentURL, float deviceScaleFactor = 1.0);
</span><ins>+    ~TokenPreloadScanner();
</ins><span class="cx"> 
</span><del>-    void scan(const HTMLToken&amp;, PreloadRequestStream&amp;, Document&amp;);
</del><ins>+    void scan(const HTMLToken&amp;, PreloadRequestStream&amp; requests, Document&amp;);
</ins><span class="cx"> 
</span><span class="cx">     void setPredictedBaseElementURL(const URL&amp; url) { m_predictedBaseElementURL = url; }
</span><span class="cx"> 
</span><ins>+    // A TokenPreloadScannerCheckpoint is valid until the next call to rewindTo,
+    // at which point all outstanding checkpoints are invalidated.
+    TokenPreloadScannerCheckpoint createCheckpoint();
+    void rewindTo(TokenPreloadScannerCheckpoint);
+
+    bool isSafeToSendToAnotherThread()
+    {
+        return m_documentURL.isSafeToSendToAnotherThread()
+            &amp;&amp; m_predictedBaseElementURL.isSafeToSendToAnotherThread();
+    }
+
</ins><span class="cx"> private:
</span><span class="cx">     enum class TagId {
</span><span class="cx">         // These tags are scanned by the StartTagScanner.
</span><span class="lines">@@ -65,29 +85,54 @@
</span><span class="cx"> 
</span><span class="cx">     void updatePredictedBaseURL(const HTMLToken&amp;);
</span><span class="cx"> 
</span><ins>+    struct Checkpoint {
+        Checkpoint(const URL&amp; predictedBaseElementURL, bool inStyle
+#if ENABLE(TEMPLATE_ELEMENT)
+            , size_t templateCount
+#endif
+            )
+            : predictedBaseElementURL(predictedBaseElementURL)
+            , inStyle(inStyle)
+#if ENABLE(TEMPLATE_ELEMENT)
+            , templateCount(templateCount)
+#endif
+        {
+        }
+
+        URL predictedBaseElementURL;
+        bool inStyle;
+#if ENABLE(TEMPLATE_ELEMENT)
+        size_t templateCount;
+#endif
+    };
+
</ins><span class="cx">     CSSPreloadScanner m_cssScanner;
</span><span class="cx">     const URL m_documentURL;
</span><del>-    const float m_deviceScaleFactor { 1 };
</del><ins>+    URL m_predictedBaseElementURL;
+    bool m_inStyle;
+    float m_deviceScaleFactor;
</ins><span class="cx"> 
</span><del>-    URL m_predictedBaseElementURL;
-    bool m_inStyle { false };
</del><span class="cx"> #if ENABLE(TEMPLATE_ELEMENT)
</span><del>-    unsigned m_templateCount { 0 };
</del><ins>+    size_t m_templateCount;
</ins><span class="cx"> #endif
</span><ins>+
+    Vector&lt;Checkpoint&gt; m_checkpoints;
</ins><span class="cx"> };
</span><span class="cx"> 
</span><span class="cx"> class HTMLPreloadScanner {
</span><del>-    WTF_MAKE_FAST_ALLOCATED;
</del><ins>+    WTF_MAKE_NONCOPYABLE(HTMLPreloadScanner); WTF_MAKE_FAST_ALLOCATED;
</ins><span class="cx"> public:
</span><span class="cx">     HTMLPreloadScanner(const HTMLParserOptions&amp;, const URL&amp; documentURL, float deviceScaleFactor = 1.0);
</span><ins>+    ~HTMLPreloadScanner();
</ins><span class="cx"> 
</span><span class="cx">     void appendToEnd(const SegmentedString&amp;);
</span><del>-    void scan(HTMLResourcePreloader&amp;, Document&amp;);
</del><ins>+    void scan(HTMLResourcePreloader*, Document&amp;);
</ins><span class="cx"> 
</span><span class="cx"> private:
</span><span class="cx">     TokenPreloadScanner m_scanner;
</span><span class="cx">     SegmentedString m_source;
</span><del>-    HTMLTokenizer m_tokenizer;
</del><ins>+    HTMLToken m_token;
+    std::unique_ptr&lt;HTMLTokenizer&gt; m_tokenizer;
</ins><span class="cx"> };
</span><span class="cx"> 
</span><span class="cx"> }
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLResourcePreloadercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLResourcePreloader.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLResourcePreloader.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLResourcePreloader.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -35,6 +35,15 @@
</span><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><ins>+bool PreloadRequest::isSafeToSendToAnotherThread() const
+{
+    return m_initiator.isSafeToSendToAnotherThread()
+        &amp;&amp; m_charset.isSafeToSendToAnotherThread()
+        &amp;&amp; m_resourceURL.isSafeToSendToAnotherThread()
+        &amp;&amp; m_mediaAttribute.isSafeToSendToAnotherThread()
+        &amp;&amp; m_baseURL.isSafeToSendToAnotherThread();
+}
+
</ins><span class="cx"> URL PreloadRequest::completeURL(Document&amp; document)
</span><span class="cx"> {
</span><span class="cx">     return document.completeURL(m_resourceURL, m_baseURL.isEmpty() ? document.url() : m_baseURL);
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLResourcePreloaderh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLResourcePreloader.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLResourcePreloader.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLResourcePreloader.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -35,14 +35,16 @@
</span><span class="cx"> public:
</span><span class="cx">     PreloadRequest(const String&amp; initiator, const String&amp; resourceURL, const URL&amp; baseURL, CachedResource::Type resourceType, const String&amp; mediaAttribute)
</span><span class="cx">         : m_initiator(initiator)
</span><del>-        , m_resourceURL(resourceURL)
</del><ins>+        , m_resourceURL(resourceURL.isolatedCopy())
</ins><span class="cx">         , m_baseURL(baseURL.copy())
</span><span class="cx">         , m_resourceType(resourceType)
</span><del>-        , m_mediaAttribute(mediaAttribute)
</del><ins>+        , m_mediaAttribute(mediaAttribute.isolatedCopy())
</ins><span class="cx">         , m_crossOriginModeAllowsCookies(false)
</span><span class="cx">     {
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    bool isSafeToSendToAnotherThread() const;
+
</ins><span class="cx">     CachedResourceRequest resourceRequest(Document&amp;);
</span><span class="cx"> 
</span><span class="cx">     const String&amp; charset() const { return m_charset; }
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLSourceTrackercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLSourceTracker.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLSourceTracker.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLSourceTracker.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,6 +1,5 @@
</span><span class="cx"> /*
</span><span class="cx">  * Copyright (C) 2010 Adam Barth. All Rights Reserved.
</span><del>- * Copyright (C) 2015 Apple Inc. All rights reserved.
</del><span class="cx">  *
</span><span class="cx">  * Redistribution and use in source and binary forms, with or without
</span><span class="cx">  * modification, are permitted provided that the following conditions
</span><span class="lines">@@ -26,7 +25,6 @@
</span><span class="cx"> 
</span><span class="cx"> #include &quot;config.h&quot;
</span><span class="cx"> #include &quot;HTMLSourceTracker.h&quot;
</span><del>-
</del><span class="cx"> #include &quot;HTMLTokenizer.h&quot;
</span><span class="cx"> #include &lt;wtf/text/StringBuilder.h&gt;
</span><span class="cx"> 
</span><span class="lines">@@ -36,41 +34,36 @@
</span><span class="cx"> {
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void HTMLSourceTracker::startToken(SegmentedString&amp; currentInput, HTMLTokenizer&amp; tokenizer)
</del><ins>+void HTMLSourceTracker::start(SegmentedString&amp; currentInput, HTMLTokenizer* tokenizer, HTMLToken&amp; token)
</ins><span class="cx"> {
</span><del>-    if (!m_started) {
-        if (tokenizer.numberOfBufferedCharacters())
-            m_previousSource = tokenizer.bufferedCharacters();
-        else
-            m_previousSource.clear();
-        m_started = true;
</del><ins>+    if (token.type() == HTMLToken::Uninitialized) {
+        m_previousSource.clear();
+        if (tokenizer-&gt;numberOfBufferedCharacters())
+            m_previousSource = tokenizer-&gt;bufferedCharacters();
</ins><span class="cx">     } else
</span><span class="cx">         m_previousSource.append(m_currentSource);
</span><span class="cx"> 
</span><span class="cx">     m_currentSource = currentInput;
</span><del>-    m_tokenStart = m_currentSource.numberOfCharactersConsumed() - m_previousSource.length();
</del><ins>+    token.setBaseOffset(m_currentSource.numberOfCharactersConsumed() - m_previousSource.length());
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-void HTMLSourceTracker::endToken(SegmentedString&amp; currentInput, HTMLTokenizer&amp; tokenizer)
</del><ins>+void HTMLSourceTracker::end(SegmentedString&amp; currentInput, HTMLTokenizer* tokenizer, HTMLToken&amp; token)
</ins><span class="cx"> {
</span><del>-    ASSERT(m_started);
-    m_started = false;
-
-    m_tokenEnd = currentInput.numberOfCharactersConsumed() - tokenizer.numberOfBufferedCharacters();
</del><span class="cx">     m_cachedSourceForToken = String();
</span><ins>+
+    // FIXME: This work should really be done by the HTMLTokenizer.
+    token.setEndOffset(currentInput.numberOfCharactersConsumed() - tokenizer-&gt;numberOfBufferedCharacters());
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-String HTMLSourceTracker::source(const HTMLToken&amp; token)
</del><ins>+String HTMLSourceTracker::sourceForToken(const HTMLToken&amp; token)
</ins><span class="cx"> {
</span><del>-    ASSERT(!m_started);
-
</del><span class="cx">     if (token.type() == HTMLToken::EndOfFile)
</span><span class="cx">         return String(); // Hides the null character we use to mark the end of file.
</span><span class="cx"> 
</span><span class="cx">     if (!m_cachedSourceForToken.isEmpty())
</span><span class="cx">         return m_cachedSourceForToken;
</span><span class="cx"> 
</span><del>-    unsigned length = m_tokenEnd - m_tokenStart;
</del><ins>+    unsigned length = token.length();
</ins><span class="cx"> 
</span><span class="cx">     StringBuilder source;
</span><span class="cx">     source.reserveCapacity(length);
</span><span class="lines">@@ -90,9 +83,4 @@
</span><span class="cx">     return m_cachedSourceForToken;
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-String HTMLSourceTracker::source(const HTMLToken&amp; token, unsigned attributeStart, unsigned attributeEnd)
-{
-    return source(token).substring(attributeStart - m_tokenStart, attributeEnd - attributeStart);
</del><span class="cx"> }
</span><del>-
-}
</del></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLSourceTrackerh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLSourceTracker.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLSourceTracker.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLSourceTracker.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,6 +1,5 @@
</span><span class="cx"> /*
</span><span class="cx">  * Copyright (C) 2010 Adam Barth. All Rights Reserved.
</span><del>- * Copyright (C) 2015 Apple Inc. All rights reserved.
</del><span class="cx">  *
</span><span class="cx">  * Redistribution and use in source and binary forms, with or without
</span><span class="cx">  * modification, are permitted provided that the following conditions
</span><span class="lines">@@ -27,11 +26,11 @@
</span><span class="cx"> #ifndef HTMLSourceTracker_h
</span><span class="cx"> #define HTMLSourceTracker_h
</span><span class="cx"> 
</span><ins>+#include &quot;HTMLToken.h&quot;
</ins><span class="cx"> #include &quot;SegmentedString.h&quot;
</span><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><del>-class HTMLToken;
</del><span class="cx"> class HTMLTokenizer;
</span><span class="cx"> 
</span><span class="cx"> class HTMLSourceTracker {
</span><span class="lines">@@ -39,18 +38,15 @@
</span><span class="cx"> public:
</span><span class="cx">     HTMLSourceTracker();
</span><span class="cx"> 
</span><del>-    void startToken(SegmentedString&amp;, HTMLTokenizer&amp;);
-    void endToken(SegmentedString&amp;, HTMLTokenizer&amp;);
</del><ins>+    // FIXME: Once we move &quot;end&quot; into HTMLTokenizer, rename &quot;start&quot; to
+    // something that makes it obvious that this method can be called multiple
+    // times.
+    void start(SegmentedString&amp;, HTMLTokenizer*, HTMLToken&amp;);
+    void end(SegmentedString&amp;, HTMLTokenizer*, HTMLToken&amp;);
</ins><span class="cx"> 
</span><del>-    String source(const HTMLToken&amp;);
-    String source(const HTMLToken&amp;, unsigned attributeStart, unsigned attributeEnd);
</del><ins>+    String sourceForToken(const HTMLToken&amp;);
</ins><span class="cx"> 
</span><span class="cx"> private:
</span><del>-    bool m_started { false };
-
-    unsigned m_tokenStart;
-    unsigned m_tokenEnd;
-
</del><span class="cx">     SegmentedString m_previousSource;
</span><span class="cx">     SegmentedString m_currentSource;
</span><span class="cx"> 
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLTokenh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLToken.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLToken.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLToken.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -53,12 +53,15 @@
</span><span class="cx">     };
</span><span class="cx"> 
</span><span class="cx">     struct Attribute {
</span><ins>+        struct Range {
+            unsigned start;
+            unsigned end;
+        };
+
+        Range nameRange;
+        Range valueRange;
</ins><span class="cx">         Vector&lt;UChar, 32&gt; name;
</span><span class="cx">         Vector&lt;UChar, 32&gt; value;
</span><del>-
-        // Used by HTMLSourceTracker.
-        unsigned startOffset;
-        unsigned endOffset;
</del><span class="cx">     };
</span><span class="cx"> 
</span><span class="cx">     typedef Vector&lt;Attribute, 10&gt; AttributeList;
</span><span class="lines">@@ -70,6 +73,11 @@
</span><span class="cx"> 
</span><span class="cx">     Type type() const;
</span><span class="cx"> 
</span><ins>+    // Used by HTMLSourceTracker.
+    void setBaseOffset(unsigned); // Base for attribute offsets, and the end of token offset.
+    void setEndOffset(unsigned);
+    unsigned length() const;
+
</ins><span class="cx">     // EndOfFile
</span><span class="cx"> 
</span><span class="cx">     void makeEndOfFile();
</span><span class="lines">@@ -105,10 +113,15 @@
</span><span class="cx">     void beginEndTag(LChar);
</span><span class="cx">     void beginEndTag(const Vector&lt;LChar, 32&gt;&amp;);
</span><span class="cx"> 
</span><del>-    void beginAttribute(unsigned offset);
</del><ins>+    void addNewAttribute();
+
+    void beginAttributeName(unsigned offset);
</ins><span class="cx">     void appendToAttributeName(UChar);
</span><ins>+    void endAttributeName(unsigned offset);
+
+    void beginAttributeValue(unsigned offset);
</ins><span class="cx">     void appendToAttributeValue(UChar);
</span><del>-    void endAttribute(unsigned offset);
</del><ins>+    void endAttributeValue(unsigned offset);
</ins><span class="cx"> 
</span><span class="cx">     void setSelfClosing();
</span><span class="cx"> 
</span><span class="lines">@@ -141,6 +154,9 @@
</span><span class="cx"> private:
</span><span class="cx">     Type m_type;
</span><span class="cx"> 
</span><ins>+    unsigned m_baseOffset;
+    unsigned m_length;
+
</ins><span class="cx">     DataVector m_data;
</span><span class="cx">     UChar m_data8BitCheck;
</span><span class="cx"> 
</span><span class="lines">@@ -156,9 +172,8 @@
</span><span class="cx"> const HTMLToken::Attribute* findAttribute(const Vector&lt;HTMLToken::Attribute&gt;&amp;, StringView name);
</span><span class="cx"> 
</span><span class="cx"> inline HTMLToken::HTMLToken()
</span><del>-    : m_type(Uninitialized)
-    , m_data8BitCheck(0)
</del><span class="cx"> {
</span><ins>+    clear();
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> inline void HTMLToken::clear()
</span><span class="lines">@@ -166,6 +181,9 @@
</span><span class="cx">     m_type = Uninitialized;
</span><span class="cx">     m_data.clear();
</span><span class="cx">     m_data8BitCheck = 0;
</span><ins>+
+    m_length = 0;
+    m_baseOffset = 0;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> inline HTMLToken::Type HTMLToken::type() const
</span><span class="lines">@@ -179,6 +197,21 @@
</span><span class="cx">     m_type = EndOfFile;
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+inline unsigned HTMLToken::length() const
+{
+    return m_length;
+}
+
+inline void HTMLToken::setBaseOffset(unsigned offset)
+{
+    m_baseOffset = offset;
+}
+
+inline void HTMLToken::setEndOffset(unsigned endOffset)
+{
+    m_length = endOffset - m_baseOffset;
+}
+
</ins><span class="cx"> inline const HTMLToken::DataVector&amp; HTMLToken::name() const
</span><span class="cx"> {
</span><span class="cx">     ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
</span><span class="lines">@@ -267,12 +300,9 @@
</span><span class="cx">     ASSERT(m_type == Uninitialized);
</span><span class="cx">     m_type = StartTag;
</span><span class="cx">     m_selfClosing = false;
</span><ins>+    m_currentAttribute = nullptr;
</ins><span class="cx">     m_attributes.clear();
</span><span class="cx"> 
</span><del>-#if !ASSERT_DISABLED
-    m_currentAttribute = nullptr;
-#endif
-
</del><span class="cx">     m_data.append(character);
</span><span class="cx">     m_data8BitCheck = character;
</span><span class="cx"> }
</span><span class="lines">@@ -282,12 +312,9 @@
</span><span class="cx">     ASSERT(m_type == Uninitialized);
</span><span class="cx">     m_type = EndTag;
</span><span class="cx">     m_selfClosing = false;
</span><ins>+    m_currentAttribute = nullptr;
</ins><span class="cx">     m_attributes.clear();
</span><span class="cx"> 
</span><del>-#if !ASSERT_DISABLED
-    m_currentAttribute = nullptr;
-#endif
-
</del><span class="cx">     m_data.append(character);
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -296,41 +323,64 @@
</span><span class="cx">     ASSERT(m_type == Uninitialized);
</span><span class="cx">     m_type = EndTag;
</span><span class="cx">     m_selfClosing = false;
</span><ins>+    m_currentAttribute = nullptr;
</ins><span class="cx">     m_attributes.clear();
</span><span class="cx"> 
</span><ins>+    m_data.appendVector(characters);
+}
+
+inline void HTMLToken::addNewAttribute()
+{
+    ASSERT(m_type == StartTag || m_type == EndTag);
+    m_attributes.grow(m_attributes.size() + 1);
+    m_currentAttribute = &amp;m_attributes.last();
+
</ins><span class="cx"> #if !ASSERT_DISABLED
</span><del>-    m_currentAttribute = nullptr;
</del><ins>+    m_currentAttribute-&gt;nameRange.start = 0;
+    m_currentAttribute-&gt;nameRange.end = 0;
+    m_currentAttribute-&gt;valueRange.start = 0;
+    m_currentAttribute-&gt;valueRange.end = 0;
</ins><span class="cx"> #endif
</span><ins>+}
</ins><span class="cx"> 
</span><del>-    m_data.appendVector(characters);
</del><ins>+inline void HTMLToken::beginAttributeName(unsigned offset)
+{
+    ASSERT(offset);
+    ASSERT(!m_currentAttribute-&gt;nameRange.start);
+    m_currentAttribute-&gt;nameRange.start = offset - m_baseOffset;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline void HTMLToken::beginAttribute(unsigned offset)
</del><ins>+inline void HTMLToken::endAttributeName(unsigned offset)
</ins><span class="cx"> {
</span><del>-    ASSERT(m_type == StartTag || m_type == EndTag);
</del><span class="cx">     ASSERT(offset);
</span><ins>+    ASSERT(m_currentAttribute-&gt;nameRange.start);
+    ASSERT(!m_currentAttribute-&gt;nameRange.end);
</ins><span class="cx"> 
</span><del>-    m_attributes.grow(m_attributes.size() + 1);
-    m_currentAttribute = &amp;m_attributes.last();
</del><ins>+    unsigned adjustedOffset = offset - m_baseOffset;
+    m_currentAttribute-&gt;nameRange.end = adjustedOffset;
</ins><span class="cx"> 
</span><del>-    m_currentAttribute-&gt;startOffset = offset;
</del><ins>+    // FIXME: Is this intentional? Why point the value at the end of the name?
+    m_currentAttribute-&gt;valueRange.start = adjustedOffset;
+    m_currentAttribute-&gt;valueRange.end = adjustedOffset;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline void HTMLToken::endAttribute(unsigned offset)
</del><ins>+inline void HTMLToken::beginAttributeValue(unsigned offset)
</ins><span class="cx"> {
</span><span class="cx">     ASSERT(offset);
</span><del>-    ASSERT(m_currentAttribute);
-    m_currentAttribute-&gt;endOffset = offset;
-#if !ASSERT_DISABLED
-    m_currentAttribute = nullptr;
-#endif
</del><ins>+    m_currentAttribute-&gt;valueRange.start = offset - m_baseOffset;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><ins>+inline void HTMLToken::endAttributeValue(unsigned offset)
+{
+    ASSERT(offset);
+    m_currentAttribute-&gt;valueRange.end = offset - m_baseOffset;
+}
+
</ins><span class="cx"> inline void HTMLToken::appendToAttributeName(UChar character)
</span><span class="cx"> {
</span><span class="cx">     ASSERT(character);
</span><span class="cx">     ASSERT(m_type == StartTag || m_type == EndTag);
</span><del>-    ASSERT(m_currentAttribute);
</del><ins>+    ASSERT(m_currentAttribute-&gt;nameRange.start);
</ins><span class="cx">     m_currentAttribute-&gt;name.append(character);
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -338,7 +388,7 @@
</span><span class="cx"> {
</span><span class="cx">     ASSERT(character);
</span><span class="cx">     ASSERT(m_type == StartTag || m_type == EndTag);
</span><del>-    ASSERT(m_currentAttribute);
</del><ins>+    ASSERT(m_currentAttribute-&gt;valueRange.start);
</ins><span class="cx">     m_currentAttribute-&gt;value.append(character);
</span><span class="cx"> }
</span><span class="cx"> 
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLTokenizercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,5 +1,5 @@
</span><span class="cx"> /*
</span><del>- * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved.
</del><ins>+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
</ins><span class="cx">  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
</span><span class="cx">  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
</span><span class="cx">  *
</span><span class="lines">@@ -29,9 +29,12 @@
</span><span class="cx"> #include &quot;HTMLTokenizer.h&quot;
</span><span class="cx"> 
</span><span class="cx"> #include &quot;HTMLEntityParser.h&quot;
</span><del>-#include &quot;HTMLNames.h&quot;
</del><ins>+#include &quot;HTMLTreeBuilder.h&quot;
</ins><span class="cx"> #include &quot;MarkupTokenizerInlines.h&quot;
</span><ins>+#include &quot;NotImplemented.h&quot;
</ins><span class="cx"> #include &lt;wtf/ASCIICType.h&gt;
</span><ins>+#include &lt;wtf/CurrentTime.h&gt;
+#include &lt;wtf/text/CString.h&gt;
</ins><span class="cx"> 
</span><span class="cx"> using namespace WTF;
</span><span class="cx"> 
</span><span class="lines">@@ -39,97 +42,66 @@
</span><span class="cx"> 
</span><span class="cx"> using namespace HTMLNames;
</span><span class="cx"> 
</span><del>-static inline LChar convertASCIIAlphaToLower(UChar character)
</del><ins>+static inline UChar toLowerCase(UChar cc)
</ins><span class="cx"> {
</span><del>-    ASSERT(isASCIIAlpha(character));
-    return toASCIILowerUnchecked(character);
</del><ins>+    ASSERT(isASCIIUpper(cc));
+    const int lowerCaseOffset = 0x20;
+    return cc + lowerCaseOffset;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-static inline bool vectorEqualsString(const Vector&lt;LChar, 32&gt;&amp; vector, const char* string)
</del><ins>+static inline bool vectorEqualsString(const Vector&lt;LChar, 32&gt;&amp; vector, const String&amp; string)
</ins><span class="cx"> {
</span><del>-    unsigned size = vector.size();
-    for (unsigned i = 0; i &lt; size; ++i) {
-        if (!string[i] || vector[i] != string[i])
-            return false;
-    }
-    return !string[size];
</del><ins>+    if (vector.size() != string.length())
+        return false;
+
+    if (!string.length())
+        return true;
+
+    return equal(string.impl(), vector.data(), vector.size());
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline bool HTMLTokenizer::inEndTagBufferingState() const
</del><ins>+static inline bool isEndTagBufferingState(HTMLTokenizer::State state)
</ins><span class="cx"> {
</span><del>-    switch (m_state) {
-    case RCDATAEndTagOpenState:
-    case RCDATAEndTagNameState:
-    case RAWTEXTEndTagOpenState:
-    case RAWTEXTEndTagNameState:
-    case ScriptDataEndTagOpenState:
-    case ScriptDataEndTagNameState:
-    case ScriptDataEscapedEndTagOpenState:
-    case ScriptDataEscapedEndTagNameState:
</del><ins>+    switch (state) {
+    case HTMLTokenizer::RCDATAEndTagOpenState:
+    case HTMLTokenizer::RCDATAEndTagNameState:
+    case HTMLTokenizer::RAWTEXTEndTagOpenState:
+    case HTMLTokenizer::RAWTEXTEndTagNameState:
+    case HTMLTokenizer::ScriptDataEndTagOpenState:
+    case HTMLTokenizer::ScriptDataEndTagNameState:
+    case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
+    case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
</ins><span class="cx">         return true;
</span><span class="cx">     default:
</span><span class="cx">         return false;
</span><span class="cx">     }
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
+#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
+#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
+#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
+
</ins><span class="cx"> HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions&amp; options)
</span><del>-    : m_preprocessor(*this)
</del><ins>+    : m_inputStreamPreprocessor(this)
</ins><span class="cx">     , m_options(options)
</span><span class="cx"> {
</span><ins>+    reset();
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline void HTMLTokenizer::bufferASCIICharacter(UChar character)
</del><ins>+HTMLTokenizer::~HTMLTokenizer()
</ins><span class="cx"> {
</span><del>-    ASSERT(character != kEndOfFileMarker);
-    ASSERT(isASCII(character));
-    LChar narrowedCharacter = character;
-    m_token.appendToCharacter(narrowedCharacter);
</del><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline void HTMLTokenizer::bufferCharacter(UChar character)
</del><ins>+void HTMLTokenizer::reset()
</ins><span class="cx"> {
</span><del>-    ASSERT(character != kEndOfFileMarker);
-    m_token.appendToCharacter(character);
</del><ins>+    m_state = HTMLTokenizer::DataState;
+    m_token = 0;
+    m_forceNullCharacterReplacement = false;
+    m_shouldAllowCDATA = false;
+    m_additionalAllowedCharacter = '\0';
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline bool HTMLTokenizer::emitAndResumeInDataState(SegmentedString&amp; source)
-{
-    saveEndTagNameIfNeeded();
-    m_state = DataState;
-    source.advanceAndUpdateLineNumber();
-    return true;
-}
-
-inline bool HTMLTokenizer::emitAndReconsumeInDataState()
-{
-    saveEndTagNameIfNeeded();
-    m_state = DataState;
-    return true;
-}
-
-inline bool HTMLTokenizer::emitEndOfFile(SegmentedString&amp; source)
-{
-    m_state = DataState;
-    if (haveBufferedCharacterToken())
-        return true;
-    source.advance();
-    m_token.clear();
-    m_token.makeEndOfFile();
-    return true;
-}
-
-inline void HTMLTokenizer::saveEndTagNameIfNeeded()
-{
-    ASSERT(m_token.type() != HTMLToken::Uninitialized);
-    if (m_token.type() == HTMLToken::StartTag)
-        m_appropriateEndTagName = m_token.name();
-}
-
-inline bool HTMLTokenizer::haveBufferedCharacterToken() const
-{
-    return m_token.type() == HTMLToken::Character;
-}
-
</del><span class="cx"> inline bool HTMLTokenizer::processEntity(SegmentedString&amp; source)
</span><span class="cx"> {
</span><span class="cx">     bool notEnoughCharacters = false;
</span><span class="lines">@@ -147,1246 +119,1426 @@
</span><span class="cx">     return true;
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void HTMLTokenizer::flushBufferedEndTag()
</del><ins>+bool HTMLTokenizer::flushBufferedEndTag(SegmentedString&amp; source)
</ins><span class="cx"> {
</span><del>-    m_token.beginEndTag(m_bufferedEndTagName);
</del><ins>+    ASSERT(m_token-&gt;type() == HTMLToken::Character || m_token-&gt;type() == HTMLToken::Uninitialized);
+    source.advanceAndUpdateLineNumber();
+    if (m_token-&gt;type() == HTMLToken::Character)
+        return true;
+    m_token-&gt;beginEndTag(m_bufferedEndTagName);
</ins><span class="cx">     m_bufferedEndTagName.clear();
</span><span class="cx">     m_appropriateEndTagName.clear();
</span><span class="cx">     m_temporaryBuffer.clear();
</span><ins>+    return false;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-bool HTMLTokenizer::commitToPartialEndTag(SegmentedString&amp; source, UChar character, State state)
-{
-    ASSERT(source.currentChar() == character);
-    appendToTemporaryBuffer(character);
-    source.advanceAndUpdateLineNumber();
</del><ins>+#define FLUSH_AND_ADVANCE_TO(stateName)                                    \
+    do {                                                                   \
+        m_state = HTMLTokenizer::stateName;                           \
+        if (flushBufferedEndTag(source))                                   \
+            return true;                                                   \
+        if (source.isEmpty()                                               \
+            || !m_inputStreamPreprocessor.peek(source))                    \
+            return haveBufferedCharacterToken();                           \
+        cc = m_inputStreamPreprocessor.nextInputCharacter();               \
+        goto stateName;                                                    \
+    } while (false)
</ins><span class="cx"> 
</span><del>-    if (haveBufferedCharacterToken()) {
-        // Emit the buffered character token.
-        // The next call to processToken will flush the buffered end tag and continue parsing it.
-        m_state = state;
-        return true;
-    }
-
-    flushBufferedEndTag();
-    return false;
-}
-
-bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString&amp; source)
</del><ins>+bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString&amp; source, HTMLTokenizer::State state)
</ins><span class="cx"> {
</span><del>-    ASSERT(source.currentChar() == '&gt;');
-    appendToTemporaryBuffer('&gt;');
-    source.advance();
-
-    m_state = DataState;
-
-    if (haveBufferedCharacterToken()) {
-        // Emit the character token we already have.
-        // The next call to processToken will flush the buffered end tag and emit it.
-        return true;
-    }
-
-    flushBufferedEndTag();
</del><ins>+    m_state = state;
+    flushBufferedEndTag(source);
</ins><span class="cx">     return true;
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-bool HTMLTokenizer::processToken(SegmentedString&amp; source)
</del><ins>+bool HTMLTokenizer::nextToken(SegmentedString&amp; source, HTMLToken&amp; token)
</ins><span class="cx"> {
</span><del>-    if (!m_bufferedEndTagName.isEmpty() &amp;&amp; !inEndTagBufferingState()) {
-        // We are back here after emitting a character token that came just before an end tag.
-        // To continue parsing the end tag we need to move the buffered tag name into the token.
-        flushBufferedEndTag();
</del><ins>+    // If we have a token in progress, then we're supposed to be called back
+    // with the same token so we can finish it.
+    ASSERT(!m_token || m_token == &amp;token || token.type() == HTMLToken::Uninitialized);
+    m_token = &amp;token;
</ins><span class="cx"> 
</span><del>-        // If we are in the data state, the end tag is already complete and we should emit it
-        // now, otherwise, we want to resume parsing the partial end tag.
-        if (m_state == DataState)
</del><ins>+    if (!m_bufferedEndTagName.isEmpty() &amp;&amp; !isEndTagBufferingState(m_state)) {
+        // FIXME: This should call flushBufferedEndTag().
+        // We started an end tag during our last iteration.
+        m_token-&gt;beginEndTag(m_bufferedEndTagName);
+        m_bufferedEndTagName.clear();
+        m_appropriateEndTagName.clear();
+        m_temporaryBuffer.clear();
+        if (m_state == HTMLTokenizer::DataState) {
+            // We're back in the data state, so we must be done with the tag.
</ins><span class="cx">             return true;
</span><ins>+        }
</ins><span class="cx">     }
</span><span class="cx"> 
</span><del>-    if (!m_preprocessor.peek(source, isNullCharacterSkippingState(m_state)))
</del><ins>+    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
</ins><span class="cx">         return haveBufferedCharacterToken();
</span><del>-    UChar character = m_preprocessor.nextInputCharacter();
</del><ins>+    UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
</ins><span class="cx"> 
</span><del>-    // https://html.spec.whatwg.org/#tokenization
</del><ins>+    // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
</ins><span class="cx">     switch (m_state) {
</span><del>-
-    BEGIN_STATE(DataState)
-        if (character == '&amp;')
-            ADVANCE_TO(CharacterReferenceInDataState);
-        if (character == '&lt;') {
-            if (haveBufferedCharacterToken())
-                RETURN_IN_CURRENT_STATE(true);
-            ADVANCE_TO(TagOpenState);
</del><ins>+    HTML_BEGIN_STATE(DataState) {
+        if (cc == '&amp;')
+            HTML_ADVANCE_TO(CharacterReferenceInDataState);
+        else if (cc == '&lt;') {
+            if (m_token-&gt;type() == HTMLToken::Character) {
+                // We have a bunch of character tokens queued up that we
+                // are emitting lazily here.
+                return true;
+            }
+            HTML_ADVANCE_TO(TagOpenState);
+        } else if (cc == kEndOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(DataState);
</ins><span class="cx">         }
</span><del>-        if (character == kEndOfFileMarker)
-            return emitEndOfFile(source);
-        bufferCharacter(character);
-        ADVANCE_TO(DataState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CharacterReferenceInDataState)
</del><ins>+    HTML_BEGIN_STATE(CharacterReferenceInDataState) {
</ins><span class="cx">         if (!processEntity(source))
</span><del>-            RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
-        SWITCH_TO(DataState);
</del><ins>+            return haveBufferedCharacterToken();
+        HTML_SWITCH_TO(DataState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RCDATAState)
-        if (character == '&amp;')
-            ADVANCE_TO(CharacterReferenceInRCDATAState);
-        if (character == '&lt;')
-            ADVANCE_TO(RCDATALessThanSignState);
-        if (character == kEndOfFileMarker)
-            RECONSUME_IN(DataState);
-        bufferCharacter(character);
-        ADVANCE_TO(RCDATAState);
</del><ins>+    HTML_BEGIN_STATE(RCDATAState) {
+        if (cc == '&amp;')
+            HTML_ADVANCE_TO(CharacterReferenceInRCDATAState);
+        else if (cc == '&lt;')
+            HTML_ADVANCE_TO(RCDATALessThanSignState);
+        else if (cc == kEndOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(RCDATAState);
+        }
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CharacterReferenceInRCDATAState)
</del><ins>+    HTML_BEGIN_STATE(CharacterReferenceInRCDATAState) {
</ins><span class="cx">         if (!processEntity(source))
</span><del>-            RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
-        SWITCH_TO(RCDATAState);
</del><ins>+            return haveBufferedCharacterToken();
+        HTML_SWITCH_TO(RCDATAState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RAWTEXTState)
-        if (character == '&lt;')
-            ADVANCE_TO(RAWTEXTLessThanSignState);
-        if (character == kEndOfFileMarker)
-            RECONSUME_IN(DataState);
-        bufferCharacter(character);
-        ADVANCE_TO(RAWTEXTState);
</del><ins>+    HTML_BEGIN_STATE(RAWTEXTState) {
+        if (cc == '&lt;')
+            HTML_ADVANCE_TO(RAWTEXTLessThanSignState);
+        else if (cc == kEndOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(RAWTEXTState);
+        }
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataState)
-        if (character == '&lt;')
-            ADVANCE_TO(ScriptDataLessThanSignState);
-        if (character == kEndOfFileMarker)
-            RECONSUME_IN(DataState);
-        bufferCharacter(character);
-        ADVANCE_TO(ScriptDataState);
</del><ins>+    HTML_BEGIN_STATE(ScriptDataState) {
+        if (cc == '&lt;')
+            HTML_ADVANCE_TO(ScriptDataLessThanSignState);
+        else if (cc == kEndOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(ScriptDataState);
+        }
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(PLAINTEXTState)
-        if (character == kEndOfFileMarker)
-            RECONSUME_IN(DataState);
-        bufferCharacter(character);
-        ADVANCE_TO(PLAINTEXTState);
</del><ins>+    HTML_BEGIN_STATE(PLAINTEXTState) {
+        if (cc == kEndOfFileMarker)
+            return emitEndOfFile(source);
+        bufferCharacter(cc);
+        HTML_ADVANCE_TO(PLAINTEXTState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(TagOpenState)
-        if (character == '!')
-            ADVANCE_TO(MarkupDeclarationOpenState);
-        if (character == '/')
-            ADVANCE_TO(EndTagOpenState);
-        if (isASCIIAlpha(character)) {
-            m_token.beginStartTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(TagNameState);
-        }
-        if (character == '?') {
</del><ins>+    HTML_BEGIN_STATE(TagOpenState) {
+        if (cc == '!')
+            HTML_ADVANCE_TO(MarkupDeclarationOpenState);
+        else if (cc == '/')
+            HTML_ADVANCE_TO(EndTagOpenState);
+        else if (isASCIIUpper(cc)) {
+            m_token-&gt;beginStartTag(toLowerCase(cc));
+            HTML_ADVANCE_TO(TagNameState);
+        } else if (isASCIILower(cc)) {
+            m_token-&gt;beginStartTag(cc);
+            HTML_ADVANCE_TO(TagNameState);
+        } else if (cc == '?') {
</ins><span class="cx">             parseError();
</span><span class="cx">             // The spec consumes the current character before switching
</span><span class="cx">             // to the bogus comment state, but it's easier to implement
</span><span class="cx">             // if we reconsume the current character.
</span><del>-            RECONSUME_IN(BogusCommentState);
</del><ins>+            HTML_RECONSUME_IN(BogusCommentState);
+        } else {
+            parseError();
+            bufferASCIICharacter('&lt;');
+            HTML_RECONSUME_IN(DataState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        bufferASCIICharacter('&lt;');
-        RECONSUME_IN(DataState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(EndTagOpenState)
-        if (isASCIIAlpha(character)) {
-            m_token.beginEndTag(convertASCIIAlphaToLower(character));
</del><ins>+    HTML_BEGIN_STATE(EndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_token-&gt;beginEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
</ins><span class="cx">             m_appropriateEndTagName.clear();
</span><del>-            ADVANCE_TO(TagNameState);
-        }
-        if (character == '&gt;') {
</del><ins>+            HTML_ADVANCE_TO(TagNameState);
+        } else if (isASCIILower(cc)) {
+            m_token-&gt;beginEndTag(static_cast&lt;LChar&gt;(cc));
+            m_appropriateEndTagName.clear();
+            HTML_ADVANCE_TO(TagNameState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            ADVANCE_TO(DataState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><span class="cx">             bufferASCIICharacter('&lt;');
</span><span class="cx">             bufferASCIICharacter('/');
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            parseError();
+            HTML_RECONSUME_IN(BogusCommentState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        RECONSUME_IN(BogusCommentState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(TagNameState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeAttributeNameState);
-        if (character == '/')
-            ADVANCE_TO(SelfClosingStartTagState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (m_options.usePreHTML5ParserQuirks &amp;&amp; character == '&lt;')
-            return emitAndReconsumeInDataState();
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(TagNameState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeAttributeNameState);
+        else if (cc == '/')
+            HTML_ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (m_options.usePreHTML5ParserQuirks &amp;&amp; cc == '&lt;')
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token-&gt;appendToName(toLowerCase(cc));
+            HTML_ADVANCE_TO(TagNameState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            m_token-&gt;appendToName(cc);
+            HTML_ADVANCE_TO(TagNameState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToName(toASCIILower(character));
-        ADVANCE_TO(TagNameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RCDATALessThanSignState)
-        if (character == '/') {
</del><ins>+    HTML_BEGIN_STATE(RCDATALessThanSignState) {
+        if (cc == '/') {
</ins><span class="cx">             m_temporaryBuffer.clear();
</span><span class="cx">             ASSERT(m_bufferedEndTagName.isEmpty());
</span><del>-            ADVANCE_TO(RCDATAEndTagOpenState);
</del><ins>+            HTML_ADVANCE_TO(RCDATAEndTagOpenState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            HTML_RECONSUME_IN(RCDATAState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        RECONSUME_IN(RCDATAState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RCDATAEndTagOpenState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(RCDATAEndTagNameState);
</del><ins>+    HTML_BEGIN_STATE(RCDATAEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(RCDATAEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(RCDATAEndTagNameState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            HTML_RECONSUME_IN(RCDATAState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        RECONSUME_IN(RCDATAState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RCDATAEndTagNameState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(RCDATAEndTagNameState);
-        }
-        if (isTokenizerWhitespace(character)) {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
-                    return true;
-                SWITCH_TO(BeforeAttributeNameState);
</del><ins>+    HTML_BEGIN_STATE(RCDATAEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(RCDATAEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(RCDATAEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+                }
+            } else if (cc == '/') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+                }
+            } else if (cc == '&gt;') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+                }
</ins><span class="cx">             }
</span><del>-        } else if (character == '/') {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
-                    return true;
-                SWITCH_TO(SelfClosingStartTagState);
-            }
-        } else if (character == '&gt;') {
-            if (isAppropriateEndTag())
-                return commitToCompleteEndTag(source);
</del><ins>+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            m_token-&gt;appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            m_temporaryBuffer.clear();
+            HTML_RECONSUME_IN(RCDATAState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        m_token.appendToCharacter(m_temporaryBuffer);
-        m_bufferedEndTagName.clear();
-        m_temporaryBuffer.clear();
-        RECONSUME_IN(RCDATAState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RAWTEXTLessThanSignState)
-        if (character == '/') {
</del><ins>+    HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {
+        if (cc == '/') {
</ins><span class="cx">             m_temporaryBuffer.clear();
</span><span class="cx">             ASSERT(m_bufferedEndTagName.isEmpty());
</span><del>-            ADVANCE_TO(RAWTEXTEndTagOpenState);
</del><ins>+            HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            HTML_RECONSUME_IN(RAWTEXTState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        RECONSUME_IN(RAWTEXTState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RAWTEXTEndTagOpenState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(RAWTEXTEndTagNameState);
</del><ins>+    HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            HTML_RECONSUME_IN(RAWTEXTState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        RECONSUME_IN(RAWTEXTState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(RAWTEXTEndTagNameState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(RAWTEXTEndTagNameState);
-        }
-        if (isTokenizerWhitespace(character)) {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
-                    return true;
-                SWITCH_TO(BeforeAttributeNameState);
</del><ins>+    HTML_BEGIN_STATE(RAWTEXTEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+                }
+            } else if (cc == '/') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+                }
+            } else if (cc == '&gt;') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+                }
</ins><span class="cx">             }
</span><del>-        } else if (character == '/') {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
-                    return true;
-                SWITCH_TO(SelfClosingStartTagState);
-            }
-        } else if (character == '&gt;') {
-            if (isAppropriateEndTag())
-                return commitToCompleteEndTag(source);
</del><ins>+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            m_token-&gt;appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            m_temporaryBuffer.clear();
+            HTML_RECONSUME_IN(RAWTEXTState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        m_token.appendToCharacter(m_temporaryBuffer);
-        m_bufferedEndTagName.clear();
-        m_temporaryBuffer.clear();
-        RECONSUME_IN(RAWTEXTState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataLessThanSignState)
-        if (character == '/') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataLessThanSignState) {
+        if (cc == '/') {
</ins><span class="cx">             m_temporaryBuffer.clear();
</span><span class="cx">             ASSERT(m_bufferedEndTagName.isEmpty());
</span><del>-            ADVANCE_TO(ScriptDataEndTagOpenState);
-        }
-        if (character == '!') {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEndTagOpenState);
+        } else if (cc == '!') {
</ins><span class="cx">             bufferASCIICharacter('&lt;');
</span><span class="cx">             bufferASCIICharacter('!');
</span><del>-            ADVANCE_TO(ScriptDataEscapeStartState);
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEscapeStartState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            HTML_RECONSUME_IN(ScriptDataState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        RECONSUME_IN(ScriptDataState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEndTagOpenState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(ScriptDataEndTagNameState);
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            HTML_RECONSUME_IN(ScriptDataState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        RECONSUME_IN(ScriptDataState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEndTagNameState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(ScriptDataEndTagNameState);
-        }
-        if (isTokenizerWhitespace(character)) {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
-                    return true;
-                SWITCH_TO(BeforeAttributeNameState);
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+                }
+            } else if (cc == '/') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+                }
+            } else if (cc == '&gt;') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+                }
</ins><span class="cx">             }
</span><del>-        } else if (character == '/') {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
-                    return true;
-                SWITCH_TO(SelfClosingStartTagState);
-            }
-        } else if (character == '&gt;') {
-            if (isAppropriateEndTag())
-                return commitToCompleteEndTag(source);
</del><ins>+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            m_token-&gt;appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            m_temporaryBuffer.clear();
+            HTML_RECONSUME_IN(ScriptDataState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        m_token.appendToCharacter(m_temporaryBuffer);
-        m_bufferedEndTagName.clear();
-        m_temporaryBuffer.clear();
-        RECONSUME_IN(ScriptDataState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapeStartState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapeStartState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataEscapeStartDashState);
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEscapeStartDashState);
</ins><span class="cx">         } else
</span><del>-            RECONSUME_IN(ScriptDataState);
</del><ins>+            HTML_RECONSUME_IN(ScriptDataState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapeStartDashState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapeStartDashState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataEscapedDashDashState);
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
</ins><span class="cx">         } else
</span><del>-            RECONSUME_IN(ScriptDataState);
</del><ins>+            HTML_RECONSUME_IN(ScriptDataState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapedState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapedState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataEscapedDashState);
-        }
-        if (character == '&lt;')
-            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEscapedDashState);
+        } else if (cc == '&lt;')
+            HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(ScriptDataEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferCharacter(character);
-        ADVANCE_TO(ScriptDataEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapedDashState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapedDashState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataEscapedDashDashState);
-        }
-        if (character == '&lt;')
-            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
+        } else if (cc == '&lt;')
+            HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(ScriptDataEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferCharacter(character);
-        ADVANCE_TO(ScriptDataEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapedDashDashState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapedDashDashState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataEscapedDashDashState);
-        }
-        if (character == '&lt;')
-            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
-        if (character == '&gt;') {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
+        } else if (cc == '&lt;')
+            HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+        else if (cc == '&gt;') {
</ins><span class="cx">             bufferASCIICharacter('&gt;');
</span><del>-            ADVANCE_TO(ScriptDataState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(ScriptDataEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferCharacter(character);
-        ADVANCE_TO(ScriptDataEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapedLessThanSignState)
-        if (character == '/') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
+        if (cc == '/') {
</ins><span class="cx">             m_temporaryBuffer.clear();
</span><span class="cx">             ASSERT(m_bufferedEndTagName.isEmpty());
</span><del>-            ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
-        }
-        if (isASCIIAlpha(character)) {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
+        } else if (isASCIIUpper(cc)) {
</ins><span class="cx">             bufferASCIICharacter('&lt;');
</span><del>-            bufferASCIICharacter(character);
</del><ins>+            bufferASCIICharacter(cc);
</ins><span class="cx">             m_temporaryBuffer.clear();
</span><del>-            appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
</del><ins>+            m_temporaryBuffer.append(toLowerCase(cc));
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else if (isASCIILower(cc)) {
+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter(cc);
+            m_temporaryBuffer.clear();
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            HTML_RECONSUME_IN(ScriptDataEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        RECONSUME_IN(ScriptDataEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapedEndTagOpenState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else {
+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            HTML_RECONSUME_IN(ScriptDataEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        RECONSUME_IN(ScriptDataEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataEscapedEndTagNameState)
-        if (isASCIIAlpha(character)) {
-            appendToTemporaryBuffer(character);
-            appendToPossibleEndTag(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
-        }
-        if (isTokenizerWhitespace(character)) {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
-                    return true;
-                SWITCH_TO(BeforeAttributeNameState);
</del><ins>+    HTML_BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(toLowerCase(cc)));
+            HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            addToPossibleEndTag(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+                }
+            } else if (cc == '/') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+                }
+            } else if (cc == '&gt;') {
+                if (isAppropriateEndTag()) {
+                    m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+                    return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+                }
</ins><span class="cx">             }
</span><del>-        } else if (character == '/') {
-            if (isAppropriateEndTag()) {
-                if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
-                    return true;
-                SWITCH_TO(SelfClosingStartTagState);
-            }
-        } else if (character == '&gt;') {
-            if (isAppropriateEndTag())
-                return commitToCompleteEndTag(source);
</del><ins>+            bufferASCIICharacter('&lt;');
+            bufferASCIICharacter('/');
+            m_token-&gt;appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            m_temporaryBuffer.clear();
+            HTML_RECONSUME_IN(ScriptDataEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferASCIICharacter('&lt;');
-        bufferASCIICharacter('/');
-        m_token.appendToCharacter(m_temporaryBuffer);
-        m_bufferedEndTagName.clear();
-        m_temporaryBuffer.clear();
-        RECONSUME_IN(ScriptDataEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataDoubleEscapeStartState)
-        if (isTokenizerWhitespace(character) || character == '/' || character == '&gt;') {
-            bufferASCIICharacter(character);
-            if (temporaryBufferIs(&quot;script&quot;))
-                ADVANCE_TO(ScriptDataDoubleEscapedState);
</del><ins>+    HTML_BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
+        if (isTokenizerWhitespace(cc) || cc == '/' || cc == '&gt;') {
+            bufferASCIICharacter(cc);
+            if (temporaryBufferIs(scriptTag.localName()))
+                HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
</ins><span class="cx">             else
</span><del>-                ADVANCE_TO(ScriptDataEscapedState);
-        }
-        if (isASCIIAlpha(character)) {
-            bufferASCIICharacter(character);
-            appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
-        }
-        RECONSUME_IN(ScriptDataEscapedState);
</del><ins>+                HTML_ADVANCE_TO(ScriptDataEscapedState);
+        } else if (isASCIIUpper(cc)) {
+            bufferASCIICharacter(cc);
+            m_temporaryBuffer.append(toLowerCase(cc));
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else if (isASCIILower(cc)) {
+            bufferASCIICharacter(cc);
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else
+            HTML_RECONSUME_IN(ScriptDataEscapedState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataDoubleEscapedState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataDoubleEscapedState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataDoubleEscapedDashState);
-        }
-        if (character == '&lt;') {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashState);
+        } else if (cc == '&lt;') {
</ins><span class="cx">             bufferASCIICharacter('&lt;');
</span><del>-            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferCharacter(character);
-        ADVANCE_TO(ScriptDataDoubleEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataDoubleEscapedDashState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
-        }
-        if (character == '&lt;') {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+        } else if (cc == '&lt;') {
</ins><span class="cx">             bufferASCIICharacter('&lt;');
</span><del>-            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferCharacter(character);
-        ADVANCE_TO(ScriptDataDoubleEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataDoubleEscapedDashDashState)
-        if (character == '-') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
+        if (cc == '-') {
</ins><span class="cx">             bufferASCIICharacter('-');
</span><del>-            ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
-        }
-        if (character == '&lt;') {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+        } else if (cc == '&lt;') {
</ins><span class="cx">             bufferASCIICharacter('&lt;');
</span><del>-            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
-        }
-        if (character == '&gt;') {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             bufferASCIICharacter('&gt;');
</span><del>-            ADVANCE_TO(ScriptDataState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(ScriptDataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
</ins><span class="cx">         }
</span><del>-        bufferCharacter(character);
-        ADVANCE_TO(ScriptDataDoubleEscapedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState)
-        if (character == '/') {
</del><ins>+    HTML_BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
+        if (cc == '/') {
</ins><span class="cx">             bufferASCIICharacter('/');
</span><span class="cx">             m_temporaryBuffer.clear();
</span><del>-            ADVANCE_TO(ScriptDataDoubleEscapeEndState);
-        }
-        RECONSUME_IN(ScriptDataDoubleEscapedState);
</del><ins>+            HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+        } else
+            HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ScriptDataDoubleEscapeEndState)
-        if (isTokenizerWhitespace(character) || character == '/' || character == '&gt;') {
-            bufferASCIICharacter(character);
-            if (temporaryBufferIs(&quot;script&quot;))
-                ADVANCE_TO(ScriptDataEscapedState);
</del><ins>+    HTML_BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
+        if (isTokenizerWhitespace(cc) || cc == '/' || cc == '&gt;') {
+            bufferASCIICharacter(cc);
+            if (temporaryBufferIs(scriptTag.localName()))
+                HTML_ADVANCE_TO(ScriptDataEscapedState);
</ins><span class="cx">             else
</span><del>-                ADVANCE_TO(ScriptDataDoubleEscapedState);
-        }
-        if (isASCIIAlpha(character)) {
-            bufferASCIICharacter(character);
-            appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
-            ADVANCE_TO(ScriptDataDoubleEscapeEndState);
-        }
-        RECONSUME_IN(ScriptDataDoubleEscapedState);
</del><ins>+                HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
+        } else if (isASCIIUpper(cc)) {
+            bufferASCIICharacter(cc);
+            m_temporaryBuffer.append(toLowerCase(cc));
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+        } else if (isASCIILower(cc)) {
+            bufferASCIICharacter(cc);
+            m_temporaryBuffer.append(static_cast&lt;LChar&gt;(cc));
+            HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+        } else
+            HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BeforeAttributeNameState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeAttributeNameState);
-        if (character == '/')
-            ADVANCE_TO(SelfClosingStartTagState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (m_options.usePreHTML5ParserQuirks &amp;&amp; character == '&lt;')
-            return emitAndReconsumeInDataState();
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(BeforeAttributeNameState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeAttributeNameState);
+        else if (cc == '/')
+            HTML_ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (m_options.usePreHTML5ParserQuirks &amp;&amp; cc == '&lt;')
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token-&gt;addNewAttribute();
+            m_token-&gt;beginAttributeName(source.numberOfCharactersConsumed());
+            m_token-&gt;appendToAttributeName(toLowerCase(cc));
+            HTML_ADVANCE_TO(AttributeNameState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            if (cc == '&quot;' || cc == '\'' || cc == '&lt;' || cc == '=')
+                parseError();
+            m_token-&gt;addNewAttribute();
+            m_token-&gt;beginAttributeName(source.numberOfCharactersConsumed());
+            m_token-&gt;appendToAttributeName(cc);
+            HTML_ADVANCE_TO(AttributeNameState);
</ins><span class="cx">         }
</span><del>-        if (character == '&quot;' || character == '\'' || character == '&lt;' || character == '=')
-            parseError();
-        m_token.beginAttribute(source.numberOfCharactersConsumed());
-        m_token.appendToAttributeName(toASCIILower(character));
-        ADVANCE_TO(AttributeNameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AttributeNameState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(AfterAttributeNameState);
-        if (character == '/')
-            ADVANCE_TO(SelfClosingStartTagState);
-        if (character == '=')
-            ADVANCE_TO(BeforeAttributeValueState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (m_options.usePreHTML5ParserQuirks &amp;&amp; character == '&lt;')
-            return emitAndReconsumeInDataState();
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(AttributeNameState) {
+        if (isTokenizerWhitespace(cc)) {
+            m_token-&gt;endAttributeName(source.numberOfCharactersConsumed());
+            HTML_ADVANCE_TO(AfterAttributeNameState);
+        } else if (cc == '/') {
+            m_token-&gt;endAttributeName(source.numberOfCharactersConsumed());
+            HTML_ADVANCE_TO(SelfClosingStartTagState);
+        } else if (cc == '=') {
+            m_token-&gt;endAttributeName(source.numberOfCharactersConsumed());
+            HTML_ADVANCE_TO(BeforeAttributeValueState);
+        } else if (cc == '&gt;') {
+            m_token-&gt;endAttributeName(source.numberOfCharactersConsumed());
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (m_options.usePreHTML5ParserQuirks &amp;&amp; cc == '&lt;') {
+            m_token-&gt;endAttributeName(source.numberOfCharactersConsumed());
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else if (isASCIIUpper(cc)) {
+            m_token-&gt;appendToAttributeName(toLowerCase(cc));
+            HTML_ADVANCE_TO(AttributeNameState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            m_token-&gt;endAttributeName(source.numberOfCharactersConsumed());
+            HTML_RECONSUME_IN(DataState);
+        } else {
+            if (cc == '&quot;' || cc == '\'' || cc == '&lt;' || cc == '=')
+                parseError();
+            m_token-&gt;appendToAttributeName(cc);
+            HTML_ADVANCE_TO(AttributeNameState);
</ins><span class="cx">         }
</span><del>-        if (character == '&quot;' || character == '\'' || character == '&lt;' || character == '=')
-            parseError();
-        m_token.appendToAttributeName(toASCIILower(character));
-        ADVANCE_TO(AttributeNameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AfterAttributeNameState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(AfterAttributeNameState);
-        if (character == '/')
-            ADVANCE_TO(SelfClosingStartTagState);
-        if (character == '=')
-            ADVANCE_TO(BeforeAttributeValueState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (m_options.usePreHTML5ParserQuirks &amp;&amp; character == '&lt;')
-            return emitAndReconsumeInDataState();
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(AfterAttributeNameState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(AfterAttributeNameState);
+        else if (cc == '/')
+            HTML_ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '=')
+            HTML_ADVANCE_TO(BeforeAttributeValueState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (m_options.usePreHTML5ParserQuirks &amp;&amp; cc == '&lt;')
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token-&gt;addNewAttribute();
+            m_token-&gt;beginAttributeName(source.numberOfCharactersConsumed());
+            m_token-&gt;appendToAttributeName(toLowerCase(cc));
+            HTML_ADVANCE_TO(AttributeNameState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            if (cc == '&quot;' || cc == '\'' || cc == '&lt;')
+                parseError();
+            m_token-&gt;addNewAttribute();
+            m_token-&gt;beginAttributeName(source.numberOfCharactersConsumed());
+            m_token-&gt;appendToAttributeName(cc);
+            HTML_ADVANCE_TO(AttributeNameState);
</ins><span class="cx">         }
</span><del>-        if (character == '&quot;' || character == '\'' || character == '&lt;')
-            parseError();
-        m_token.beginAttribute(source.numberOfCharactersConsumed());
-        m_token.appendToAttributeName(toASCIILower(character));
-        ADVANCE_TO(AttributeNameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BeforeAttributeValueState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeAttributeValueState);
-        if (character == '&quot;')
-            ADVANCE_TO(AttributeValueDoubleQuotedState);
-        if (character == '&amp;')
-            RECONSUME_IN(AttributeValueUnquotedState);
-        if (character == '\'')
-            ADVANCE_TO(AttributeValueSingleQuotedState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(BeforeAttributeValueState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeAttributeValueState);
+        else if (cc == '&quot;') {
+            m_token-&gt;beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+            HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
+        } else if (cc == '&amp;') {
+            m_token-&gt;beginAttributeValue(source.numberOfCharactersConsumed());
+            HTML_RECONSUME_IN(AttributeValueUnquotedState);
+        } else if (cc == '\'') {
+            m_token-&gt;beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+            HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            if (cc == '&lt;' || cc == '=' || cc == '`')
+                parseError();
+            m_token-&gt;beginAttributeValue(source.numberOfCharactersConsumed());
+            m_token-&gt;appendToAttributeValue(cc);
+            HTML_ADVANCE_TO(AttributeValueUnquotedState);
</ins><span class="cx">         }
</span><del>-        if (character == '&lt;' || character == '=' || character == '`')
-            parseError();
-        m_token.appendToAttributeValue(character);
-        ADVANCE_TO(AttributeValueUnquotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AttributeValueDoubleQuotedState)
-        if (character == '&quot;') {
-            m_token.endAttribute(source.numberOfCharactersConsumed());
-            ADVANCE_TO(AfterAttributeValueQuotedState);
-        }
-        if (character == '&amp;') {
</del><ins>+    HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
+        if (cc == '&quot;') {
+            m_token-&gt;endAttributeValue(source.numberOfCharactersConsumed());
+            HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
+        } else if (cc == '&amp;') {
</ins><span class="cx">             m_additionalAllowedCharacter = '&quot;';
</span><del>-            ADVANCE_TO(CharacterReferenceInAttributeValueState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.endAttribute(source.numberOfCharactersConsumed());
-            RECONSUME_IN(DataState);
</del><ins>+            m_token-&gt;endAttributeValue(source.numberOfCharactersConsumed());
+            HTML_RECONSUME_IN(DataState);
+        } else {
+            m_token-&gt;appendToAttributeValue(cc);
+            HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToAttributeValue(character);
-        ADVANCE_TO(AttributeValueDoubleQuotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AttributeValueSingleQuotedState)
-        if (character == '\'') {
-            m_token.endAttribute(source.numberOfCharactersConsumed());
-            ADVANCE_TO(AfterAttributeValueQuotedState);
-        }
-        if (character == '&amp;') {
</del><ins>+    HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
+        if (cc == '\'') {
+            m_token-&gt;endAttributeValue(source.numberOfCharactersConsumed());
+            HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
+        } else if (cc == '&amp;') {
</ins><span class="cx">             m_additionalAllowedCharacter = '\'';
</span><del>-            ADVANCE_TO(CharacterReferenceInAttributeValueState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.endAttribute(source.numberOfCharactersConsumed());
-            RECONSUME_IN(DataState);
</del><ins>+            m_token-&gt;endAttributeValue(source.numberOfCharactersConsumed());
+            HTML_RECONSUME_IN(DataState);
+        } else {
+            m_token-&gt;appendToAttributeValue(cc);
+            HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToAttributeValue(character);
-        ADVANCE_TO(AttributeValueSingleQuotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AttributeValueUnquotedState)
-        if (isTokenizerWhitespace(character)) {
-            m_token.endAttribute(source.numberOfCharactersConsumed());
-            ADVANCE_TO(BeforeAttributeNameState);
-        }
-        if (character == '&amp;') {
</del><ins>+    HTML_BEGIN_STATE(AttributeValueUnquotedState) {
+        if (isTokenizerWhitespace(cc)) {
+            m_token-&gt;endAttributeValue(source.numberOfCharactersConsumed());
+            HTML_ADVANCE_TO(BeforeAttributeNameState);
+        } else if (cc == '&amp;') {
</ins><span class="cx">             m_additionalAllowedCharacter = '&gt;';
</span><del>-            ADVANCE_TO(CharacterReferenceInAttributeValueState);
-        }
-        if (character == '&gt;') {
-            m_token.endAttribute(source.numberOfCharactersConsumed());
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
+        } else if (cc == '&gt;') {
+            m_token-&gt;endAttributeValue(source.numberOfCharactersConsumed());
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.endAttribute(source.numberOfCharactersConsumed());
-            RECONSUME_IN(DataState);
</del><ins>+            m_token-&gt;endAttributeValue(source.numberOfCharactersConsumed());
+            HTML_RECONSUME_IN(DataState);
+        } else {
+            if (cc == '&quot;' || cc == '\'' || cc == '&lt;' || cc == '=' || cc == '`')
+                parseError();
+            m_token-&gt;appendToAttributeValue(cc);
+            HTML_ADVANCE_TO(AttributeValueUnquotedState);
</ins><span class="cx">         }
</span><del>-        if (character == '&quot;' || character == '\'' || character == '&lt;' || character == '=' || character == '`')
-            parseError();
-        m_token.appendToAttributeValue(character);
-        ADVANCE_TO(AttributeValueUnquotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CharacterReferenceInAttributeValueState)
</del><ins>+    HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
</ins><span class="cx">         bool notEnoughCharacters = false;
</span><span class="cx">         StringBuilder decodedEntity;
</span><span class="cx">         bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
</span><span class="cx">         if (notEnoughCharacters)
</span><del>-            RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
</del><ins>+            return haveBufferedCharacterToken();
</ins><span class="cx">         if (!success) {
</span><span class="cx">             ASSERT(decodedEntity.isEmpty());
</span><del>-            m_token.appendToAttributeValue('&amp;');
</del><ins>+            m_token-&gt;appendToAttributeValue('&amp;');
</ins><span class="cx">         } else {
</span><span class="cx">             for (unsigned i = 0; i &lt; decodedEntity.length(); ++i)
</span><del>-                m_token.appendToAttributeValue(decodedEntity[i]);
</del><ins>+                m_token-&gt;appendToAttributeValue(decodedEntity[i]);
</ins><span class="cx">         }
</span><span class="cx">         // We're supposed to switch back to the attribute value state that
</span><span class="cx">         // we were in when we were switched into this state. Rather than
</span><span class="cx">         // keeping track of this explictly, we observe that the previous
</span><span class="cx">         // state can be determined by m_additionalAllowedCharacter.
</span><span class="cx">         if (m_additionalAllowedCharacter == '&quot;')
</span><del>-            SWITCH_TO(AttributeValueDoubleQuotedState);
-        if (m_additionalAllowedCharacter == '\'')
-            SWITCH_TO(AttributeValueSingleQuotedState);
-        ASSERT(m_additionalAllowedCharacter == '&gt;');
-        SWITCH_TO(AttributeValueUnquotedState);
</del><ins>+            HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
+        else if (m_additionalAllowedCharacter == '\'')
+            HTML_SWITCH_TO(AttributeValueSingleQuotedState);
+        else if (m_additionalAllowedCharacter == '&gt;')
+            HTML_SWITCH_TO(AttributeValueUnquotedState);
+        else
+            ASSERT_NOT_REACHED();
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AfterAttributeValueQuotedState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeAttributeNameState);
-        if (character == '/')
-            ADVANCE_TO(SelfClosingStartTagState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (m_options.usePreHTML5ParserQuirks &amp;&amp; character == '&lt;')
-            return emitAndReconsumeInDataState();
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeAttributeNameState);
+        else if (cc == '/')
+            HTML_ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (m_options.usePreHTML5ParserQuirks &amp;&amp; cc == '&lt;')
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            parseError();
+            HTML_RECONSUME_IN(BeforeAttributeNameState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        RECONSUME_IN(BeforeAttributeNameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(SelfClosingStartTagState)
-        if (character == '&gt;') {
-            m_token.setSelfClosing();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(SelfClosingStartTagState) {
+        if (cc == '&gt;') {
+            m_token-&gt;setSelfClosing();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            RECONSUME_IN(DataState);
</del><ins>+            HTML_RECONSUME_IN(DataState);
+        } else {
+            parseError();
+            HTML_RECONSUME_IN(BeforeAttributeNameState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        RECONSUME_IN(BeforeAttributeNameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BogusCommentState)
-        m_token.beginComment();
-        RECONSUME_IN(ContinueBogusCommentState);
</del><ins>+    HTML_BEGIN_STATE(BogusCommentState) {
+        m_token-&gt;beginComment();
+        HTML_RECONSUME_IN(ContinueBogusCommentState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(ContinueBogusCommentState)
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == kEndOfFileMarker)
-            return emitAndReconsumeInDataState();
-        m_token.appendToComment(character);
-        ADVANCE_TO(ContinueBogusCommentState);
</del><ins>+    HTML_BEGIN_STATE(ContinueBogusCommentState) {
+        if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == kEndOfFileMarker)
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        else {
+            m_token-&gt;appendToComment(cc);
+            HTML_ADVANCE_TO(ContinueBogusCommentState);
+        }
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(MarkupDeclarationOpenState)
-        if (character == '-') {
-            auto result = source.advancePast(&quot;--&quot;);
</del><ins>+    HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
+        DEPRECATED_DEFINE_STATIC_LOCAL(String, dashDashString, (ASCIILiteral(&quot;--&quot;)));
+        DEPRECATED_DEFINE_STATIC_LOCAL(String, doctypeString, (ASCIILiteral(&quot;doctype&quot;)));
+        DEPRECATED_DEFINE_STATIC_LOCAL(String, cdataString, (ASCIILiteral(&quot;[CDATA[&quot;)));
+        if (cc == '-') {
+            SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
</ins><span class="cx">             if (result == SegmentedString::DidMatch) {
</span><del>-                m_token.beginComment();
-                SWITCH_TO(CommentStartState);
-            }
-            if (result == SegmentedString::NotEnoughCharacters)
-                RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
-        } else if (isASCIIAlphaCaselessEqual(character, 'd')) {
-            auto result = source.advancePastIgnoringCase(&quot;doctype&quot;);
-            if (result == SegmentedString::DidMatch)
-                SWITCH_TO(DOCTYPEState);
-            if (result == SegmentedString::NotEnoughCharacters)
-                RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
-        } else if (character == '[' &amp;&amp; shouldAllowCDATA()) {
-            auto result = source.advancePast(&quot;[CDATA[&quot;);
-            if (result == SegmentedString::DidMatch)
-                SWITCH_TO(CDATASectionState);
-            if (result == SegmentedString::NotEnoughCharacters)
-                RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
</del><ins>+                source.advanceAndASSERT('-');
+                source.advanceAndASSERT('-');
+                m_token-&gt;beginComment();
+                HTML_SWITCH_TO(CommentStartState);
+            } else if (result == SegmentedString::NotEnoughCharacters)
+                return haveBufferedCharacterToken();
+        } else if (cc == 'D' || cc == 'd') {
+            SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
+            if (result == SegmentedString::DidMatch) {
+                advanceStringAndASSERTIgnoringCase(source, &quot;doctype&quot;);
+                HTML_SWITCH_TO(DOCTYPEState);
+            } else if (result == SegmentedString::NotEnoughCharacters)
+                return haveBufferedCharacterToken();
+        } else if (cc == '[' &amp;&amp; shouldAllowCDATA()) {
+            SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
+            if (result == SegmentedString::DidMatch) {
+                advanceStringAndASSERT(source, &quot;[CDATA[&quot;);
+                HTML_SWITCH_TO(CDATASectionState);
+            } else if (result == SegmentedString::NotEnoughCharacters)
+                return haveBufferedCharacterToken();
</ins><span class="cx">         }
</span><span class="cx">         parseError();
</span><del>-        RECONSUME_IN(BogusCommentState);
</del><ins>+        HTML_RECONSUME_IN(BogusCommentState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CommentStartState)
-        if (character == '-')
-            ADVANCE_TO(CommentStartDashState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(CommentStartState) {
+        if (cc == '-')
+            HTML_ADVANCE_TO(CommentStartDashState);
+        else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndReconsumeInDataState();
</del><ins>+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToComment(cc);
+            HTML_ADVANCE_TO(CommentState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToComment(character);
-        ADVANCE_TO(CommentState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CommentStartDashState)
-        if (character == '-')
-            ADVANCE_TO(CommentEndState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(CommentStartDashState) {
+        if (cc == '-')
+            HTML_ADVANCE_TO(CommentEndState);
+        else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndReconsumeInDataState();
</del><ins>+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment(cc);
+            HTML_ADVANCE_TO(CommentState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToComment('-');
-        m_token.appendToComment(character);
-        ADVANCE_TO(CommentState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CommentState)
-        if (character == '-')
-            ADVANCE_TO(CommentEndDashState);
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(CommentState) {
+        if (cc == '-')
+            HTML_ADVANCE_TO(CommentEndDashState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndReconsumeInDataState();
</del><ins>+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToComment(cc);
+            HTML_ADVANCE_TO(CommentState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToComment(character);
-        ADVANCE_TO(CommentState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CommentEndDashState)
-        if (character == '-')
-            ADVANCE_TO(CommentEndState);
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(CommentEndDashState) {
+        if (cc == '-')
+            HTML_ADVANCE_TO(CommentEndState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndReconsumeInDataState();
</del><ins>+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment(cc);
+            HTML_ADVANCE_TO(CommentState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToComment('-');
-        m_token.appendToComment(character);
-        ADVANCE_TO(CommentState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CommentEndState)
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == '!') {
</del><ins>+    HTML_BEGIN_STATE(CommentEndState) {
+        if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == '!') {
</ins><span class="cx">             parseError();
</span><del>-            ADVANCE_TO(CommentEndBangState);
-        }
-        if (character == '-') {
</del><ins>+            HTML_ADVANCE_TO(CommentEndBangState);
+        } else if (cc == '-') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.appendToComment('-');
-            ADVANCE_TO(CommentEndState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;appendToComment('-');
+            HTML_ADVANCE_TO(CommentEndState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndReconsumeInDataState();
</del><ins>+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment(cc);
+            HTML_ADVANCE_TO(CommentState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        m_token.appendToComment('-');
-        m_token.appendToComment('-');
-        m_token.appendToComment(character);
-        ADVANCE_TO(CommentState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CommentEndBangState)
-        if (character == '-') {
-            m_token.appendToComment('-');
-            m_token.appendToComment('-');
-            m_token.appendToComment('!');
-            ADVANCE_TO(CommentEndDashState);
-        }
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(CommentEndBangState) {
+        if (cc == '-') {
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment('!');
+            HTML_ADVANCE_TO(CommentEndDashState);
+        } else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            return emitAndReconsumeInDataState();
</del><ins>+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment('-');
+            m_token-&gt;appendToComment('!');
+            m_token-&gt;appendToComment(cc);
+            HTML_ADVANCE_TO(CommentState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToComment('-');
-        m_token.appendToComment('-');
-        m_token.appendToComment('!');
-        m_token.appendToComment(character);
-        ADVANCE_TO(CommentState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(DOCTYPEState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeDOCTYPENameState);
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(DOCTYPEState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeDOCTYPENameState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.beginDOCTYPE();
-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;beginDOCTYPE();
+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            HTML_RECONSUME_IN(BeforeDOCTYPENameState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        RECONSUME_IN(BeforeDOCTYPENameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BeforeDOCTYPENameState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeDOCTYPENameState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeDOCTYPENameState);
+        else if (isASCIIUpper(cc)) {
+            m_token-&gt;beginDOCTYPE(toLowerCase(cc));
+            HTML_ADVANCE_TO(DOCTYPENameState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.beginDOCTYPE();
-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;beginDOCTYPE();
+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.beginDOCTYPE();
-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;beginDOCTYPE();
+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;beginDOCTYPE(cc);
+            HTML_ADVANCE_TO(DOCTYPENameState);
</ins><span class="cx">         }
</span><del>-        m_token.beginDOCTYPE(toASCIILower(character));
-        ADVANCE_TO(DOCTYPENameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(DOCTYPENameState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(AfterDOCTYPENameState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(DOCTYPENameState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(AfterDOCTYPENameState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token-&gt;appendToName(toLowerCase(cc));
+            HTML_ADVANCE_TO(DOCTYPENameState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToName(cc);
+            HTML_ADVANCE_TO(DOCTYPENameState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToName(toASCIILower(character));
-        ADVANCE_TO(DOCTYPENameState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AfterDOCTYPENameState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(AfterDOCTYPENameState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(AfterDOCTYPENameState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(AfterDOCTYPENameState);
+        if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            DEPRECATED_DEFINE_STATIC_LOCAL(String, publicString, (ASCIILiteral(&quot;public&quot;)));
+            DEPRECATED_DEFINE_STATIC_LOCAL(String, systemString, (ASCIILiteral(&quot;system&quot;)));
+            if (cc == 'P' || cc == 'p') {
+                SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
+                if (result == SegmentedString::DidMatch) {
+                    advanceStringAndASSERTIgnoringCase(source, &quot;public&quot;);
+                    HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
+                } else if (result == SegmentedString::NotEnoughCharacters)
+                    return haveBufferedCharacterToken();
+            } else if (cc == 'S' || cc == 's') {
+                SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
+                if (result == SegmentedString::DidMatch) {
+                    advanceStringAndASSERTIgnoringCase(source, &quot;system&quot;);
+                    HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
+                } else if (result == SegmentedString::NotEnoughCharacters)
+                    return haveBufferedCharacterToken();
+            }
+            parseError();
+            m_token-&gt;setForceQuirks();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        if (isASCIIAlphaCaselessEqual(character, 'p')) {
-            auto result = source.advancePastIgnoringCase(&quot;public&quot;);
-            if (result == SegmentedString::DidMatch)
-                SWITCH_TO(AfterDOCTYPEPublicKeywordState);
-            if (result == SegmentedString::NotEnoughCharacters)
-                RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
-        } else if (isASCIIAlphaCaselessEqual(character, 's')) {
-            auto result = source.advancePastIgnoringCase(&quot;system&quot;);
-            if (result == SegmentedString::DidMatch)
-                SWITCH_TO(AfterDOCTYPESystemKeywordState);
-            if (result == SegmentedString::NotEnoughCharacters)
-                RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
-        }
-        parseError();
-        m_token.setForceQuirks();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AfterDOCTYPEPublicKeywordState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
-        if (character == '&quot;') {
</del><ins>+    HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+        else if (cc == '&quot;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
-        }
-        if (character == '\'') {
</del><ins>+            m_token-&gt;setPublicIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
-        }
-        if (character == '&gt;') {
</del><ins>+            m_token-&gt;setPublicIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            m_token-&gt;setForceQuirks();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        m_token.setForceQuirks();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
-        if (character == '&quot;') {
-            m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
-        }
-        if (character == '\'') {
-            m_token.setPublicIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
-        }
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+        else if (cc == '&quot;') {
+            m_token-&gt;setPublicIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            m_token-&gt;setPublicIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            m_token-&gt;setForceQuirks();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        m_token.setForceQuirks();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState)
-        if (character == '&quot;')
-            ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
+        if (cc == '&quot;')
+            HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+        else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToPublicIdentifier(cc);
+            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToPublicIdentifier(character);
-        ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState)
-        if (character == '\'')
-            ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
+        if (cc == '\'')
+            HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+        else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToPublicIdentifier(cc);
+            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToPublicIdentifier(character);
-        ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AfterDOCTYPEPublicIdentifierState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == '&quot;') {
</del><ins>+    HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == '&quot;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
-        }
-        if (character == '\'') {
</del><ins>+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            m_token-&gt;setForceQuirks();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        m_token.setForceQuirks();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == '&quot;') {
-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
-        }
-        if (character == '\'') {
-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == '&quot;') {
+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            m_token-&gt;setForceQuirks();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        m_token.setForceQuirks();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AfterDOCTYPESystemKeywordState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
-        if (character == '&quot;') {
</del><ins>+    HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+        else if (cc == '&quot;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
-        }
-        if (character == '\'') {
</del><ins>+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
-        }
-        if (character == '&gt;') {
</del><ins>+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            m_token-&gt;setForceQuirks();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        m_token.setForceQuirks();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BeforeDOCTYPESystemIdentifierState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
-        if (character == '&quot;') {
-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
-        }
-        if (character == '\'') {
-            m_token.setSystemIdentifierToEmptyString();
-            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
-        }
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+        if (cc == '&quot;') {
+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            m_token-&gt;setSystemIdentifierToEmptyString();
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            m_token-&gt;setForceQuirks();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        m_token.setForceQuirks();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState)
-        if (character == '&quot;')
-            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
+        if (cc == '&quot;')
+            HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+        else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToSystemIdentifier(cc);
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToSystemIdentifier(character);
-        ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState)
-        if (character == '\'')
-            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
-        if (character == '&gt;') {
</del><ins>+    HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
+        if (cc == '\'')
+            HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+        else if (cc == '&gt;') {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndResumeInDataState(source);
-        }
-        if (character == kEndOfFileMarker) {
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        } else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            m_token-&gt;appendToSystemIdentifier(cc);
+            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
</ins><span class="cx">         }
</span><del>-        m_token.appendToSystemIdentifier(character);
-        ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(AfterDOCTYPESystemIdentifierState)
-        if (isTokenizerWhitespace(character))
-            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == kEndOfFileMarker) {
</del><ins>+    HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+        else if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == kEndOfFileMarker) {
</ins><span class="cx">             parseError();
</span><del>-            m_token.setForceQuirks();
-            return emitAndReconsumeInDataState();
</del><ins>+            m_token-&gt;setForceQuirks();
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        } else {
+            parseError();
+            HTML_ADVANCE_TO(BogusDOCTYPEState);
</ins><span class="cx">         }
</span><del>-        parseError();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(BogusDOCTYPEState)
-        if (character == '&gt;')
-            return emitAndResumeInDataState(source);
-        if (character == kEndOfFileMarker)
-            return emitAndReconsumeInDataState();
-        ADVANCE_TO(BogusDOCTYPEState);
</del><ins>+    HTML_BEGIN_STATE(BogusDOCTYPEState) {
+        if (cc == '&gt;')
+            return emitAndResumeIn(source, HTMLTokenizer::DataState);
+        else if (cc == kEndOfFileMarker)
+            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+        HTML_ADVANCE_TO(BogusDOCTYPEState);
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CDATASectionState)
-        if (character == ']')
-            ADVANCE_TO(CDATASectionRightSquareBracketState);
-        if (character == kEndOfFileMarker)
-            RECONSUME_IN(DataState);
-        bufferCharacter(character);
-        ADVANCE_TO(CDATASectionState);
</del><ins>+    HTML_BEGIN_STATE(CDATASectionState) {
+        if (cc == ']')
+            HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
+        else if (cc == kEndOfFileMarker)
+            HTML_RECONSUME_IN(DataState);
+        else {
+            bufferCharacter(cc);
+            HTML_ADVANCE_TO(CDATASectionState);
+        }
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><del>-    BEGIN_STATE(CDATASectionRightSquareBracketState)
-        if (character == ']')
-            ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
-        bufferASCIICharacter(']');
-        RECONSUME_IN(CDATASectionState);
-    END_STATE()
</del><ins>+    HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
+        if (cc == ']')
+            HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
+        else {
+            bufferASCIICharacter(']');
+            HTML_RECONSUME_IN(CDATASectionState);
+        }
+    }
</ins><span class="cx"> 
</span><del>-    BEGIN_STATE(CDATASectionDoubleRightSquareBracketState)
-        if (character == '&gt;')
-            ADVANCE_TO(DataState);
-        bufferASCIICharacter(']');
-        bufferASCIICharacter(']');
-        RECONSUME_IN(CDATASectionState);
</del><ins>+    HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
+        if (cc == '&gt;')
+            HTML_ADVANCE_TO(DataState);
+        else {
+            bufferASCIICharacter(']');
+            bufferASCIICharacter(']');
+            HTML_RECONSUME_IN(CDATASectionState);
+        }
+    }
</ins><span class="cx">     END_STATE()
</span><span class="cx"> 
</span><span class="cx">     }
</span><span class="lines">@@ -1409,45 +1561,39 @@
</span><span class="cx"> void HTMLTokenizer::updateStateFor(const AtomicString&amp; tagName)
</span><span class="cx"> {
</span><span class="cx">     if (tagName == textareaTag || tagName == titleTag)
</span><del>-        m_state = RCDATAState;
</del><ins>+        setState(HTMLTokenizer::RCDATAState);
</ins><span class="cx">     else if (tagName == plaintextTag)
</span><del>-        m_state = PLAINTEXTState;
</del><ins>+        setState(HTMLTokenizer::PLAINTEXTState);
</ins><span class="cx">     else if (tagName == scriptTag)
</span><del>-        m_state = ScriptDataState;
</del><ins>+        setState(HTMLTokenizer::ScriptDataState);
</ins><span class="cx">     else if (tagName == styleTag
</span><span class="cx">         || tagName == iframeTag
</span><span class="cx">         || tagName == xmpTag
</span><span class="cx">         || (tagName == noembedTag &amp;&amp; m_options.pluginsEnabled)
</span><span class="cx">         || tagName == noframesTag
</span><span class="cx">         || (tagName == noscriptTag &amp;&amp; m_options.scriptEnabled))
</span><del>-        m_state = RAWTEXTState;
</del><ins>+        setState(HTMLTokenizer::RAWTEXTState);
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline void HTMLTokenizer::appendToTemporaryBuffer(UChar character)
</del><ins>+inline bool HTMLTokenizer::temporaryBufferIs(const String&amp; expectedString)
</ins><span class="cx"> {
</span><del>-    ASSERT(isASCII(character));
-    m_temporaryBuffer.append(character);
-}
-
-inline bool HTMLTokenizer::temporaryBufferIs(const char* expectedString)
-{
</del><span class="cx">     return vectorEqualsString(m_temporaryBuffer, expectedString);
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline void HTMLTokenizer::appendToPossibleEndTag(UChar character)
</del><ins>+inline void HTMLTokenizer::addToPossibleEndTag(LChar cc)
</ins><span class="cx"> {
</span><del>-    ASSERT(isASCII(character));
-    m_bufferedEndTagName.append(character);
</del><ins>+    ASSERT(isEndTagBufferingState(m_state));
+    m_bufferedEndTagName.append(cc);
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline bool HTMLTokenizer::isAppropriateEndTag() const
</del><ins>+inline bool HTMLTokenizer::isAppropriateEndTag()
</ins><span class="cx"> {
</span><span class="cx">     if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
</span><span class="cx">         return false;
</span><span class="cx"> 
</span><del>-    unsigned size = m_bufferedEndTagName.size();
</del><ins>+    size_t numCharacters = m_bufferedEndTagName.size();
</ins><span class="cx"> 
</span><del>-    for (unsigned i = 0; i &lt; size; i++) {
</del><ins>+    for (size_t i = 0; i &lt; numCharacters; i++) {
</ins><span class="cx">         if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
</span><span class="cx">             return false;
</span><span class="cx">     }
</span><span class="lines">@@ -1457,6 +1603,7 @@
</span><span class="cx"> 
</span><span class="cx"> inline void HTMLTokenizer::parseError()
</span><span class="cx"> {
</span><ins>+    notImplemented();
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> }
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLTokenizerh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLTokenizer.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLTokenizer.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLTokenizer.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,5 +1,5 @@
</span><span class="cx"> /*
</span><del>- * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved.
</del><ins>+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
</ins><span class="cx">  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
</span><span class="cx">  *
</span><span class="cx">  * Redistribution and use in source and binary forms, with or without
</span><span class="lines">@@ -30,54 +30,19 @@
</span><span class="cx"> #include &quot;HTMLParserOptions.h&quot;
</span><span class="cx"> #include &quot;HTMLToken.h&quot;
</span><span class="cx"> #include &quot;InputStreamPreprocessor.h&quot;
</span><ins>+#include &quot;SegmentedString.h&quot;
</ins><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><del>-class SegmentedString;
-
</del><span class="cx"> class HTMLTokenizer {
</span><ins>+    WTF_MAKE_NONCOPYABLE(HTMLTokenizer);
+    WTF_MAKE_FAST_ALLOCATED;
</ins><span class="cx"> public:
</span><del>-    explicit HTMLTokenizer(const HTMLParserOptions&amp; = HTMLParserOptions());
</del><ins>+    explicit HTMLTokenizer(const HTMLParserOptions&amp;);
+    ~HTMLTokenizer();
</ins><span class="cx"> 
</span><del>-    // If we can't parse a whole token, this returns null.
-    class TokenPtr;
-    TokenPtr nextToken(SegmentedString&amp;);
</del><ins>+    void reset();
</ins><span class="cx"> 
</span><del>-    // Returns a copy of any characters buffered internally by the tokenizer.
-    // The tokenizer buffers characters when searching for the &lt;/script&gt; token that terminates a script element.
-    String bufferedCharacters() const;
-    size_t numberOfBufferedCharacters() const;
-
-    // Updates the tokenizer's state according to the given tag name. This is an approximation of how the tree
-    // builder would update the tokenizer's state. This method is useful for approximating HTML tokenization.
-    // To get exactly the correct tokenization, you need the real tree builder.
-    //
-    // The main failures in the approximation are as follows:
-    //
-    //  * The first set of character tokens emitted for a &lt;pre&gt; element might contain an extra leading newline.
-    //  * The replacement of U+0000 with U+FFFD will not be sensitive to the tree builder's insertion mode.
-    //  * CDATA sections in foreign content will be tokenized as bogus comments instead of as character tokens.
-    //
-    // This approximation is also the algorithm called for when parsing an HTML fragment.
-    // https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
-    void updateStateFor(const AtomicString&amp; tagName);
-
-    void setForceNullCharacterReplacement(bool);
-
-    bool shouldAllowCDATA() const;
-    void setShouldAllowCDATA(bool);
-
-    bool isInDataState() const;
-
-    void setDataState();
-    void setPLAINTEXTState();
-    void setRAWTEXTState();
-    void setRCDATAState();
-    void setScriptDataState();
-
-    bool neverSkipNullCharacters() const;
-
-private:
</del><span class="cx">     enum State {
</span><span class="cx">         DataState,
</span><span class="cx">         CharacterReferenceInDataState,
</span><span class="lines">@@ -123,7 +88,10 @@
</span><span class="cx">         AfterAttributeValueQuotedState,
</span><span class="cx">         SelfClosingStartTagState,
</span><span class="cx">         BogusCommentState,
</span><del>-        ContinueBogusCommentState, // Not in the HTML spec, used internally to track whether we started the bogus comment token.
</del><ins>+        // The ContinueBogusCommentState is not in the HTML5 spec, but we use
+        // it internally to keep track of whether we've started the bogus
+        // comment token yet.
+        ContinueBogusCommentState,
</ins><span class="cx">         MarkupDeclarationOpenState,
</span><span class="cx">         CommentStartState,
</span><span class="cx">         CommentStartDashState,
</span><span class="lines">@@ -153,198 +121,156 @@
</span><span class="cx">         CDATASectionDoubleRightSquareBracketState,
</span><span class="cx">     };
</span><span class="cx"> 
</span><del>-    bool processToken(SegmentedString&amp;);
-    bool processEntity(SegmentedString&amp;);
</del><ins>+    // This function returns true if it emits a token. Otherwise, callers
+    // must provide the same (in progress) token on the next call (unless
+    // they call reset() first).
+    bool nextToken(SegmentedString&amp;, HTMLToken&amp;);
</ins><span class="cx"> 
</span><del>-    void parseError();
</del><ins>+    // Returns a copy of any characters buffered internally by the tokenizer.
+    // The tokenizer buffers characters when searching for the &lt;/script&gt; token
+    // that terminates a script element.
+    String bufferedCharacters() const;
</ins><span class="cx"> 
</span><del>-    void bufferASCIICharacter(UChar);
-    void bufferCharacter(UChar);
</del><ins>+    size_t numberOfBufferedCharacters() const
+    {
+        // Notice that we add 2 to the length of the m_temporaryBuffer to
+        // account for the &quot;&lt;/&quot; characters, which are effecitvely buffered in
+        // the tokenizer's state machine.
+        return m_temporaryBuffer.size() ? m_temporaryBuffer.size() + 2 : 0;
+    }
</ins><span class="cx"> 
</span><del>-    bool emitAndResumeInDataState(SegmentedString&amp;);
-    bool emitAndReconsumeInDataState();
-    bool emitEndOfFile(SegmentedString&amp;);
</del><ins>+    // Updates the tokenizer's state according to the given tag name. This is
+    // an approximation of how the tree builder would update the tokenizer's
+    // state. This method is useful for approximating HTML tokenization. To
+    // get exactly the correct tokenization, you need the real tree builder.
+    //
+    // The main failures in the approximation are as follows:
+    //
+    //  * The first set of character tokens emitted for a &lt;pre&gt; element might
+    //    contain an extra leading newline.
+    //  * The replacement of U+0000 with U+FFFD will not be sensitive to the
+    //    tree builder's insertion mode.
+    //  * CDATA sections in foreign content will be tokenized as bogus comments
+    //    instead of as character tokens.
+    //
+    void updateStateFor(const AtomicString&amp; tagName);
</ins><span class="cx"> 
</span><del>-    // Return true if we wil emit a character token before dealing with the buffered end tag.
-    void flushBufferedEndTag();
-    bool commitToPartialEndTag(SegmentedString&amp;, UChar, State);
-    bool commitToCompleteEndTag(SegmentedString&amp;);
</del><ins>+    bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; }
+    void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; }
</ins><span class="cx"> 
</span><del>-    void appendToTemporaryBuffer(UChar);
-    bool temporaryBufferIs(const char*);
</del><ins>+    bool shouldAllowCDATA() const { return m_shouldAllowCDATA; }
+    void setShouldAllowCDATA(bool value) { m_shouldAllowCDATA = value; }
</ins><span class="cx"> 
</span><del>-    // Sometimes we speculatively consume input characters and we don't know whether they represent
-    // end tags or RCDATA, etc. These functions help manage these state.
-    bool inEndTagBufferingState() const;
-    void appendToPossibleEndTag(UChar);
-    void saveEndTagNameIfNeeded();
-    bool isAppropriateEndTag() const;
</del><ins>+    State state() const { return m_state; }
+    void setState(State state) { m_state = state; }
</ins><span class="cx"> 
</span><del>-    bool haveBufferedCharacterToken() const;
</del><ins>+    inline bool shouldSkipNullCharacters() const
+    {
+        return !m_forceNullCharacterReplacement
+            &amp;&amp; (m_state == HTMLTokenizer::DataState
+                || m_state == HTMLTokenizer::RCDATAState
+                || m_state == HTMLTokenizer::RAWTEXTState);
+    }
</ins><span class="cx"> 
</span><del>-    static bool isNullCharacterSkippingState(State);
-
-    State m_state { DataState };
-    bool m_forceNullCharacterReplacement { false };
-    bool m_shouldAllowCDATA { false };
-
-    mutable HTMLToken m_token;
-
-    // https://html.spec.whatwg.org/#additional-allowed-character
-    UChar m_additionalAllowedCharacter { 0 };
-
-    // https://html.spec.whatwg.org/#preprocessing-the-input-stream
-    InputStreamPreprocessor&lt;HTMLTokenizer&gt; m_preprocessor;
-
-    Vector&lt;UChar, 32&gt; m_appropriateEndTagName;
-
-    // https://html.spec.whatwg.org/#temporary-buffer
-    Vector&lt;LChar, 32&gt; m_temporaryBuffer;
-
-    // We occasionally want to emit both a character token and an end tag
-    // token (e.g., when lexing script). We buffer the name of the end tag
-    // token here so we remember it next time we re-enter the tokenizer.
-    Vector&lt;LChar, 32&gt; m_bufferedEndTagName;
-
-    const HTMLParserOptions m_options;
-};
-
-class HTMLTokenizer::TokenPtr {
-public:
-    TokenPtr();
-    ~TokenPtr();
-
-    TokenPtr(TokenPtr&amp;&amp;);
-    TokenPtr&amp; operator=(TokenPtr&amp;&amp;) = delete;
-
-    void clear();
-
-    operator bool() const;
-
-    HTMLToken&amp; operator*() const;
-    HTMLToken* operator-&gt;() const;
-
</del><span class="cx"> private:
</span><del>-    friend class HTMLTokenizer;
-    explicit TokenPtr(HTMLToken*);
</del><ins>+    inline bool processEntity(SegmentedString&amp;);
</ins><span class="cx"> 
</span><del>-    HTMLToken* m_token { nullptr };
-};
</del><ins>+    inline void parseError();
</ins><span class="cx"> 
</span><del>-inline HTMLTokenizer::TokenPtr::TokenPtr()
-{
-}
</del><ins>+    void bufferASCIICharacter(UChar character)
+    {
+        ASSERT(character != kEndOfFileMarker);
+        ASSERT(isASCII(character));
+        m_token-&gt;appendToCharacter(static_cast&lt;LChar&gt;(character));
+    }
</ins><span class="cx"> 
</span><del>-inline HTMLTokenizer::TokenPtr::TokenPtr(HTMLToken* token)
-    : m_token(token)
-{
-}
</del><ins>+    void bufferCharacter(UChar character)
+    {
+        ASSERT(character != kEndOfFileMarker);
+        m_token-&gt;appendToCharacter(character);
+    }
+    void bufferCharacter(char) = delete;
+    void bufferCharacter(LChar) = delete;
</ins><span class="cx"> 
</span><del>-inline HTMLTokenizer::TokenPtr::~TokenPtr()
-{
-    if (m_token)
-        m_token-&gt;clear();
-}
</del><ins>+    inline bool emitAndResumeIn(SegmentedString&amp; source, State state)
+    {
+        saveEndTagNameIfNeeded();
+        m_state = state;
+        source.advanceAndUpdateLineNumber();
+        return true;
+    }
+    
+    inline bool emitAndReconsumeIn(SegmentedString&amp;, State state)
+    {
+        saveEndTagNameIfNeeded();
+        m_state = state;
+        return true;
+    }
</ins><span class="cx"> 
</span><del>-inline HTMLTokenizer::TokenPtr::TokenPtr(TokenPtr&amp;&amp; other)
-    : m_token(other.m_token)
-{
-    other.m_token = nullptr;
-}
-
-inline void HTMLTokenizer::TokenPtr::clear()
-{
-    if (m_token) {
</del><ins>+    inline bool emitEndOfFile(SegmentedString&amp; source)
+    {
+        if (haveBufferedCharacterToken())
+            return true;
+        m_state = HTMLTokenizer::DataState;
+        source.advanceAndUpdateLineNumber();
</ins><span class="cx">         m_token-&gt;clear();
</span><del>-        m_token = nullptr;
</del><ins>+        m_token-&gt;makeEndOfFile();
+        return true;
</ins><span class="cx">     }
</span><del>-}
</del><span class="cx"> 
</span><del>-inline HTMLTokenizer::TokenPtr::operator bool() const
-{
-    return m_token;
-}
</del><ins>+    inline bool flushEmitAndResumeIn(SegmentedString&amp;, State);
</ins><span class="cx"> 
</span><del>-inline HTMLToken&amp; HTMLTokenizer::TokenPtr::operator*() const
-{
-    ASSERT(m_token);
-    return *m_token;
-}
</del><ins>+    // Return whether we need to emit a character token before dealing with
+    // the buffered end tag.
+    inline bool flushBufferedEndTag(SegmentedString&amp;);
+    inline bool temporaryBufferIs(const String&amp;);
</ins><span class="cx"> 
</span><del>-inline HTMLToken* HTMLTokenizer::TokenPtr::operator-&gt;() const
-{
-    ASSERT(m_token);
-    return m_token;
-}
</del><ins>+    // Sometimes we speculatively consume input characters and we don't
+    // know whether they represent end tags or RCDATA, etc. These
+    // functions help manage these state.
+    inline void addToPossibleEndTag(LChar cc);
</ins><span class="cx"> 
</span><del>-inline HTMLTokenizer::TokenPtr HTMLTokenizer::nextToken(SegmentedString&amp; source)
-{
-    return TokenPtr(processToken(source) ? &amp;m_token : nullptr);
-}
</del><ins>+    inline void saveEndTagNameIfNeeded()
+    {
+        ASSERT(m_token-&gt;type() != HTMLToken::Uninitialized);
+        if (m_token-&gt;type() == HTMLToken::StartTag)
+            m_appropriateEndTagName = m_token-&gt;name();
+    }
+    inline bool isAppropriateEndTag();
</ins><span class="cx"> 
</span><del>-inline size_t HTMLTokenizer::numberOfBufferedCharacters() const
-{
-    // Notice that we add 2 to the length of the m_temporaryBuffer to
-    // account for the &quot;&lt;/&quot; characters, which are effecitvely buffered in
-    // the tokenizer's state machine.
-    return m_temporaryBuffer.size() ? m_temporaryBuffer.size() + 2 : 0;
-}
</del><span class="cx"> 
</span><del>-inline void HTMLTokenizer::setForceNullCharacterReplacement(bool value)
-{
-    m_forceNullCharacterReplacement = value;
-}
</del><ins>+    inline bool haveBufferedCharacterToken()
+    {
+        return m_token-&gt;type() == HTMLToken::Character;
+    }
</ins><span class="cx"> 
</span><del>-inline bool HTMLTokenizer::shouldAllowCDATA() const
-{
-    return m_shouldAllowCDATA;
-}
</del><ins>+    State m_state;
+    bool m_forceNullCharacterReplacement;
+    bool m_shouldAllowCDATA;
</ins><span class="cx"> 
</span><del>-inline void HTMLTokenizer::setShouldAllowCDATA(bool value)
-{
-    m_shouldAllowCDATA = value;
-}
</del><ins>+    // m_token is owned by the caller. If nextToken is not on the stack,
+    // this member might be pointing to unallocated memory.
+    HTMLToken* m_token;
</ins><span class="cx"> 
</span><del>-inline bool HTMLTokenizer::isInDataState() const
-{
-    return m_state == DataState;
-}
</del><ins>+    // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
+    UChar m_additionalAllowedCharacter;
</ins><span class="cx"> 
</span><del>-inline void HTMLTokenizer::setDataState()
-{
-    m_state = DataState;
-}
</del><ins>+    // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
+    InputStreamPreprocessor&lt;HTMLTokenizer&gt; m_inputStreamPreprocessor;
</ins><span class="cx"> 
</span><del>-inline void HTMLTokenizer::setPLAINTEXTState()
-{
-    m_state = PLAINTEXTState;
-}
</del><ins>+    Vector&lt;UChar, 32&gt; m_appropriateEndTagName;
</ins><span class="cx"> 
</span><del>-inline void HTMLTokenizer::setRAWTEXTState()
-{
-    m_state = RAWTEXTState;
-}
</del><ins>+    // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
+    Vector&lt;LChar, 32&gt; m_temporaryBuffer;
</ins><span class="cx"> 
</span><del>-inline void HTMLTokenizer::setRCDATAState()
-{
-    m_state = RCDATAState;
-}
</del><ins>+    // We occationally want to emit both a character token and an end tag
+    // token (e.g., when lexing script). We buffer the name of the end tag
+    // token here so we remember it next time we re-enter the tokenizer.
+    Vector&lt;LChar, 32&gt; m_bufferedEndTagName;
</ins><span class="cx"> 
</span><del>-inline void HTMLTokenizer::setScriptDataState()
-{
-    m_state = ScriptDataState;
-}
</del><ins>+    HTMLParserOptions m_options;
+};
</ins><span class="cx"> 
</span><del>-inline bool HTMLTokenizer::isNullCharacterSkippingState(State state)
-{
-    return state == DataState || state == RCDATAState || state == RAWTEXTState;
</del><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline bool HTMLTokenizer::neverSkipNullCharacters() const
-{
-    return m_forceNullCharacterReplacement;
-}
-
-}
-
</del><span class="cx"> #endif
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserHTMLTreeBuildercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -695,7 +695,7 @@
</span><span class="cx">     if (token.name() == plaintextTag) {
</span><span class="cx">         processFakePEndTagIfPInButtonScope();
</span><span class="cx">         m_tree.insertHTMLElement(&amp;token);
</span><del>-        m_parser.tokenizer().setPLAINTEXTState();
</del><ins>+        m_parser.tokenizer().setState(HTMLTokenizer::PLAINTEXTState);
</ins><span class="cx">         return;
</span><span class="cx">     }
</span><span class="cx">     if (token.name() == buttonTag) {
</span><span class="lines">@@ -799,7 +799,7 @@
</span><span class="cx">     if (token.name() == textareaTag) {
</span><span class="cx">         m_tree.insertHTMLElement(&amp;token);
</span><span class="cx">         m_shouldSkipLeadingNewline = true;
</span><del>-        m_parser.tokenizer().setRCDATAState();
</del><ins>+        m_parser.tokenizer().setState(HTMLTokenizer::RCDATAState);
</ins><span class="cx">         m_originalInsertionMode = m_insertionMode;
</span><span class="cx">         m_framesetOk = false;
</span><span class="cx">         m_insertionMode = InsertionMode::Text;
</span><span class="lines">@@ -2137,8 +2137,8 @@
</span><span class="cx">             // self-closing script tag was encountered and pre-HTML5 parser
</span><span class="cx">             // quirks are enabled. We must set the tokenizer's state to
</span><span class="cx">             // DataState explicitly if the tokenizer didn't have a chance to.
</span><del>-            ASSERT(m_parser.tokenizer().isInDataState() || m_options.usePreHTML5ParserQuirks);
-            m_parser.tokenizer().setDataState();
</del><ins>+            ASSERT(m_parser.tokenizer().state() == HTMLTokenizer::DataState || m_options.usePreHTML5ParserQuirks);
+            m_parser.tokenizer().setState(HTMLTokenizer::DataState);
</ins><span class="cx">             return;
</span><span class="cx">         }
</span><span class="cx">         m_tree.openElements().pop();
</span><span class="lines">@@ -2739,7 +2739,7 @@
</span><span class="cx"> {
</span><span class="cx">     ASSERT(token.type() == HTMLToken::StartTag);
</span><span class="cx">     m_tree.insertHTMLElement(&amp;token);
</span><del>-    m_parser.tokenizer().setRCDATAState();
</del><ins>+    m_parser.tokenizer().setState(HTMLTokenizer::RCDATAState);
</ins><span class="cx">     m_originalInsertionMode = m_insertionMode;
</span><span class="cx">     m_insertionMode = InsertionMode::Text;
</span><span class="cx"> }
</span><span class="lines">@@ -2748,7 +2748,7 @@
</span><span class="cx"> {
</span><span class="cx">     ASSERT(token.type() == HTMLToken::StartTag);
</span><span class="cx">     m_tree.insertHTMLElement(&amp;token);
</span><del>-    m_parser.tokenizer().setRAWTEXTState();
</del><ins>+    m_parser.tokenizer().setState(HTMLTokenizer::RAWTEXTState);
</ins><span class="cx">     m_originalInsertionMode = m_insertionMode;
</span><span class="cx">     m_insertionMode = InsertionMode::Text;
</span><span class="cx"> }
</span><span class="lines">@@ -2757,7 +2757,7 @@
</span><span class="cx"> {
</span><span class="cx">     ASSERT(token.type() == HTMLToken::StartTag);
</span><span class="cx">     m_tree.insertScriptElement(&amp;token);
</span><del>-    m_parser.tokenizer().setScriptDataState();
</del><ins>+    m_parser.tokenizer().setState(HTMLTokenizer::ScriptDataState);
</ins><span class="cx">     m_originalInsertionMode = m_insertionMode;
</span><span class="cx"> 
</span><span class="cx">     TextPosition position = m_parser.textPosition();
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserInputStreamPreprocessorh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/InputStreamPreprocessor.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/InputStreamPreprocessor.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/InputStreamPreprocessor.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -40,7 +40,7 @@
</span><span class="cx"> class InputStreamPreprocessor {
</span><span class="cx">     WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
</span><span class="cx"> public:
</span><del>-    explicit InputStreamPreprocessor(Tokenizer&amp; tokenizer)
</del><ins>+    InputStreamPreprocessor(Tokenizer* tokenizer)
</ins><span class="cx">         : m_tokenizer(tokenizer)
</span><span class="cx">     {
</span><span class="cx">         reset();
</span><span class="lines">@@ -51,11 +51,8 @@
</span><span class="cx">     // Returns whether we succeeded in peeking at the next character.
</span><span class="cx">     // The only way we can fail to peek is if there are no more
</span><span class="cx">     // characters in |source| (after collapsing \r\n, etc).
</span><del>-    ALWAYS_INLINE bool peek(SegmentedString&amp; source, bool skipNullCharacters = false)
</del><ins>+    ALWAYS_INLINE bool peek(SegmentedString&amp; source)
</ins><span class="cx">     {
</span><del>-        if (source.isEmpty())
-            return false;
-
</del><span class="cx">         m_nextInputCharacter = source.currentChar();
</span><span class="cx"> 
</span><span class="cx">         // Every branch in this function is expensive, so we have a
</span><span class="lines">@@ -67,14 +64,16 @@
</span><span class="cx">             m_skipNextNewLine = false;
</span><span class="cx">             return true;
</span><span class="cx">         }
</span><del>-        return processNextInputCharacter(source, skipNullCharacters);
</del><ins>+        return processNextInputCharacter(source);
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     // Returns whether there are more characters in |source| after advancing.
</span><del>-    ALWAYS_INLINE bool advance(SegmentedString&amp; source, bool skipNullCharacters = false)
</del><ins>+    ALWAYS_INLINE bool advance(SegmentedString&amp; source)
</ins><span class="cx">     {
</span><span class="cx">         source.advanceAndUpdateLineNumber();
</span><del>-        return peek(source, skipNullCharacters);
</del><ins>+        if (source.isEmpty())
+            return false;
+        return peek(source);
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     bool skipNextNewLine() const { return m_skipNextNewLine; }
</span><span class="lines">@@ -86,7 +85,7 @@
</span><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx"> private:
</span><del>-    bool processNextInputCharacter(SegmentedString&amp; source, bool skipNullCharacters)
</del><ins>+    bool processNextInputCharacter(SegmentedString&amp; source)
</ins><span class="cx">     {
</span><span class="cx">     ProcessAgain:
</span><span class="cx">         ASSERT(m_nextInputCharacter == source.currentChar());
</span><span class="lines">@@ -108,7 +107,7 @@
</span><span class="cx">             // by the replacement character. We suspect this is a problem with the spec as doing
</span><span class="cx">             // that filtering breaks surrogate pair handling and causes us not to match Minefield.
</span><span class="cx">             if (m_nextInputCharacter == '\0' &amp;&amp; !shouldTreatNullAsEndOfFileMarker(source)) {
</span><del>-                if (skipNullCharacters &amp;&amp; !m_tokenizer.neverSkipNullCharacters()) {
</del><ins>+                if (m_tokenizer-&gt;shouldSkipNullCharacters()) {
</ins><span class="cx">                     source.advancePastNonNewline();
</span><span class="cx">                     if (source.isEmpty())
</span><span class="cx">                         return false;
</span><span class="lines">@@ -126,7 +125,7 @@
</span><span class="cx">         return source.isClosed() &amp;&amp; source.length() == 1;
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    Tokenizer&amp; m_tokenizer;
</del><ins>+    Tokenizer* m_tokenizer;
</ins><span class="cx"> 
</span><span class="cx">     // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
</span><span class="cx">     UChar m_nextInputCharacter;
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserTextDocumentParsercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/TextDocumentParser.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/TextDocumentParser.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/TextDocumentParser.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -61,7 +61,7 @@
</span><span class="cx"> 
</span><span class="cx">     // Although Text Documents expose a &quot;pre&quot; element in their DOM, they
</span><span class="cx">     // act like a &lt;plaintext&gt; tag, so we have to force plaintext mode.
</span><del>-    tokenizer().setPLAINTEXTState();
</del><ins>+    tokenizer().setState(HTMLTokenizer::PLAINTEXTState);
</ins><span class="cx"> 
</span><span class="cx">     m_haveInsertedFakePreElement = true;
</span><span class="cx"> }
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserXSSAuditorcpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/XSSAuditor.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/XSSAuditor.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/XSSAuditor.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -566,7 +566,7 @@
</span><span class="cx"> String XSSAuditor::decodedSnippetForName(const FilterTokenRequest&amp; request)
</span><span class="cx"> {
</span><span class="cx">     // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the &quot;&lt;&quot;).
</span><del>-    return fullyDecodeString(request.sourceTracker.source(request.token), m_encoding).substring(0, request.token.name().size() + 1);
</del><ins>+    return fullyDecodeString(request.sourceTracker.sourceForToken(request.token), m_encoding).substring(0, request.token.name().size() + 1);
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> String XSSAuditor::decodedSnippetForAttribute(const FilterTokenRequest&amp; request, const HTMLToken::Attribute&amp; attribute, AttributeKind treatment)
</span><span class="lines">@@ -575,9 +575,9 @@
</span><span class="cx">     // for an input of |name=&quot;value&quot;|, the snippet is |name=&quot;value|. For an
</span><span class="cx">     // unquoted input of |name=value |, the snippet is |name=value|.
</span><span class="cx">     // FIXME: We should grab one character before the name also.
</span><del>-    unsigned start = attribute.startOffset;
-    unsigned end = attribute.endOffset;
-    String decodedSnippet = fullyDecodeString(request.sourceTracker.source(request.token, start, end), m_encoding);
</del><ins>+    unsigned start = attribute.nameRange.start;
+    unsigned end = attribute.valueRange.end;
+    String decodedSnippet = fullyDecodeString(request.sourceTracker.sourceForToken(request.token).substring(start, end - start), m_encoding);
</ins><span class="cx">     decodedSnippet.truncate(kMaximumFragmentLengthTarget);
</span><span class="cx">     if (treatment == SrcLikeAttribute) {
</span><span class="cx">         int slashCount = 0;
</span><span class="lines">@@ -630,7 +630,7 @@
</span><span class="cx"> 
</span><span class="cx"> String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest&amp; request)
</span><span class="cx"> {
</span><del>-    String string = request.sourceTracker.source(request.token);
</del><ins>+    String string = request.sourceTracker.sourceForToken(request.token);
</ins><span class="cx">     size_t startPosition = 0;
</span><span class="cx">     size_t endPosition = string.length();
</span><span class="cx">     size_t foundPosition = notFound;
</span><span class="lines">@@ -737,4 +737,12 @@
</span><span class="cx">     return (m_documentURL.host() == resourceURL.host() &amp;&amp; resourceURL.query().isEmpty());
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+bool XSSAuditor::isSafeToSendToAnotherThread() const
+{
+    return m_documentURL.isSafeToSendToAnotherThread()
+        &amp;&amp; m_decodedURL.isSafeToSendToAnotherThread()
+        &amp;&amp; m_decodedHTTPBody.isSafeToSendToAnotherThread()
+        &amp;&amp; m_cachedDecodedSnippet.isSafeToSendToAnotherThread();
+}
+
</ins><span class="cx"> } // namespace WebCore
</span></span></pre></div>
<a id="trunkSourceWebCorehtmlparserXSSAuditorh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/parser/XSSAuditor.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/parser/XSSAuditor.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/parser/XSSAuditor.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -61,6 +61,7 @@
</span><span class="cx">     void initForFragment();
</span><span class="cx"> 
</span><span class="cx">     std::unique_ptr&lt;XSSInfo&gt; filterToken(const FilterTokenRequest&amp;);
</span><ins>+    bool isSafeToSendToAnotherThread() const;
</ins><span class="cx"> 
</span><span class="cx"> private:
</span><span class="cx">     static const size_t kMaximumFragmentLengthTarget = 100;
</span></span></pre></div>
<a id="trunkSourceWebCorehtmltrackWebVTTTokenizercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,6 +1,6 @@
</span><span class="cx"> /*
</span><span class="cx">  * Copyright (C) 2011, 2013 Google Inc.  All rights reserved.
</span><del>- * Copyright (C) 2014-2015 Apple Inc.  All rights reserved.
</del><ins>+ * Copyright (C) 2014 Apple Inc.  All rights reserved.
</ins><span class="cx">  *
</span><span class="cx">  * Redistribution and use in source and binary forms, with or without
</span><span class="cx">  * modification, are permitted provided that the following conditions are
</span><span class="lines">@@ -41,15 +41,19 @@
</span><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><del>-#define WEBVTT_ADVANCE_TO(stateName)                        \
-    do {                                                    \
-        ASSERT(!m_input.isEmpty());                         \
-        m_preprocessor.advance(m_input);                    \
-        character = m_preprocessor.nextInputCharacter();    \
-        goto stateName;                                     \
</del><ins>+#define WEBVTT_BEGIN_STATE(stateName) case stateName: stateName:
+#define WEBVTT_ADVANCE_TO(stateName)                               \
+    do {                                                           \
+        state = stateName;                                         \
+        ASSERT(!m_input.isEmpty());                                \
+        m_inputStreamPreprocessor.advance(m_input);                \
+        cc = m_inputStreamPreprocessor.nextInputCharacter();       \
+        goto stateName;                                            \
</ins><span class="cx">     } while (false)
</span><ins>+
</ins><span class="cx">     
</span><del>-template&lt;unsigned charactersCount&gt; ALWAYS_INLINE bool equalLiteral(const StringBuilder&amp; s, const char (&amp;characters)[charactersCount])
</del><ins>+template&lt;unsigned charactersCount&gt;
+ALWAYS_INLINE bool equalLiteral(const StringBuilder&amp; s, const char (&amp;characters)[charactersCount])
</ins><span class="cx"> {
</span><span class="cx">     return WTF::equal(s, reinterpret_cast&lt;const LChar*&gt;(characters), charactersCount - 1);
</span><span class="cx"> }
</span><span class="lines">@@ -75,7 +79,7 @@
</span><span class="cx"> 
</span><span class="cx"> WebVTTTokenizer::WebVTTTokenizer(const String&amp; input)
</span><span class="cx">     : m_input(input)
</span><del>-    , m_preprocessor(*this)
</del><ins>+    , m_inputStreamPreprocessor(this)
</ins><span class="cx"> {
</span><span class="cx">     // Append an EOF marker and close the input &quot;stream&quot;.
</span><span class="cx">     ASSERT(!m_input.isClosed());
</span><span class="lines">@@ -85,12 +89,12 @@
</span><span class="cx"> 
</span><span class="cx"> bool WebVTTTokenizer::nextToken(WebVTTToken&amp; token)
</span><span class="cx"> {
</span><del>-    if (m_input.isEmpty() || !m_preprocessor.peek(m_input))
</del><ins>+    if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input))
</ins><span class="cx">         return false;
</span><span class="cx"> 
</span><del>-    UChar character = m_preprocessor.nextInputCharacter();
-    if (character == kEndOfFileMarker) {
-        m_preprocessor.advance(m_input);
</del><ins>+    UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
+    if (cc == kEndOfFileMarker) {
+        m_inputStreamPreprocessor.advance(m_input);
</ins><span class="cx">         return false;
</span><span class="cx">     }
</span><span class="cx"> 
</span><span class="lines">@@ -98,134 +102,169 @@
</span><span class="cx">     StringBuilder result;
</span><span class="cx">     StringBuilder classes;
</span><span class="cx"> 
</span><del>-// 4.8.10.13.4 WebVTT cue text tokenizer
-DataState:
-    if (character == '&amp;') {
-        buffer.append('&amp;');
-        WEBVTT_ADVANCE_TO(EscapeState);
-    } else if (character == '&lt;') {
-        if (result.isEmpty())
-            WEBVTT_ADVANCE_TO(TagState);
</del><ins>+    enum {
+        DataState,
+        EscapeState,
+        TagState,
+        StartTagState,
+        StartTagClassState,
+        StartTagAnnotationState,
+        EndTagState,
+        TimestampTagState,
+    } state = DataState;
+
+    // 4.8.10.13.4 WebVTT cue text tokenizer
+    switch (state) {
+    WEBVTT_BEGIN_STATE(DataState) {
+        if (cc == '&amp;') {
+            buffer.append(static_cast&lt;LChar&gt;(cc));
+            WEBVTT_ADVANCE_TO(EscapeState);
+        } else if (cc == '&lt;') {
+            if (result.isEmpty())
+                WEBVTT_ADVANCE_TO(TagState);
+            else {
+                // We don't want to advance input or perform a state transition - just return a (new) token.
+                // (On the next call to nextToken we will see '&lt;' again, but take the other branch in this if instead.)
+                return emitToken(token, WebVTTToken::StringToken(result.toString()));
+            }
+        } else if (cc == kEndOfFileMarker)
+            return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString()));
</ins><span class="cx">         else {
</span><del>-            // We don't want to advance input or perform a state transition - just return a (new) token.
-            // (On the next call to nextToken we will see '&lt;' again, but take the other branch in this if instead.)
</del><ins>+            result.append(cc);
+            WEBVTT_ADVANCE_TO(DataState);
+        }
+    }
+    END_STATE()
+
+    WEBVTT_BEGIN_STATE(EscapeState) {
+        if (cc == ';') {
+            if (equalLiteral(buffer, &quot;&amp;amp&quot;))
+                result.append('&amp;');
+            else if (equalLiteral(buffer, &quot;&amp;lt&quot;))
+                result.append('&lt;');
+            else if (equalLiteral(buffer, &quot;&amp;gt&quot;))
+                result.append('&gt;');
+            else if (equalLiteral(buffer, &quot;&amp;lrm&quot;))
+                result.append(leftToRightMark);
+            else if (equalLiteral(buffer, &quot;&amp;rlm&quot;))
+                result.append(rightToLeftMark);
+            else if (equalLiteral(buffer, &quot;&amp;nbsp&quot;))
+                result.append(noBreakSpace);
+            else {
+                buffer.append(static_cast&lt;LChar&gt;(cc));
+                result.append(buffer);
+            }
+            buffer.clear();
+            WEBVTT_ADVANCE_TO(DataState);
+        } else if (isASCIIAlphanumeric(cc)) {
+            buffer.append(static_cast&lt;LChar&gt;(cc));
+            WEBVTT_ADVANCE_TO(EscapeState);
+        } else if (cc == '&lt;') {
+            result.append(buffer);
</ins><span class="cx">             return emitToken(token, WebVTTToken::StringToken(result.toString()));
</span><ins>+        } else if (cc == kEndOfFileMarker) {
+            result.append(buffer);
+            return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString()));
+        } else {
+            result.append(buffer);
+            buffer.clear();
+
+            if (cc == '&amp;') {
+                buffer.append(static_cast&lt;LChar&gt;(cc));
+                WEBVTT_ADVANCE_TO(EscapeState);
+            }
+            result.append(cc);
+            WEBVTT_ADVANCE_TO(DataState);
</ins><span class="cx">         }
</span><del>-    } else if (character == kEndOfFileMarker)
-        return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString()));
-    else {
-        result.append(character);
-        WEBVTT_ADVANCE_TO(DataState);
</del><span class="cx">     }
</span><ins>+    END_STATE()
</ins><span class="cx"> 
</span><del>-EscapeState:
-    if (character == ';') {
-        if (equalLiteral(buffer, &quot;&amp;amp&quot;))
-            result.append('&amp;');
-        else if (equalLiteral(buffer, &quot;&amp;lt&quot;))
-            result.append('&lt;');
-        else if (equalLiteral(buffer, &quot;&amp;gt&quot;))
-            result.append('&gt;');
-        else if (equalLiteral(buffer, &quot;&amp;lrm&quot;))
-            result.append(leftToRightMark);
-        else if (equalLiteral(buffer, &quot;&amp;rlm&quot;))
-            result.append(rightToLeftMark);
-        else if (equalLiteral(buffer, &quot;&amp;nbsp&quot;))
-            result.append(noBreakSpace);
</del><ins>+    WEBVTT_BEGIN_STATE(TagState) {
+        if (isTokenizerWhitespace(cc)) {
+            ASSERT(result.isEmpty());
+            WEBVTT_ADVANCE_TO(StartTagAnnotationState);
+        } else if (cc == '.') {
+            ASSERT(result.isEmpty());
+            WEBVTT_ADVANCE_TO(StartTagClassState);
+        } else if (cc == '/') {
+            WEBVTT_ADVANCE_TO(EndTagState);
+        } else if (WTF::isASCIIDigit(cc)) {
+            result.append(cc);
+            WEBVTT_ADVANCE_TO(TimestampTagState);
+        } else if (cc == '&gt;' || cc == kEndOfFileMarker) {
+            ASSERT(result.isEmpty());
+            return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString()));
+        } else {
+            result.append(cc);
+            WEBVTT_ADVANCE_TO(StartTagState);
+        }
+    }
+    END_STATE()
+
+    WEBVTT_BEGIN_STATE(StartTagState) {
+        if (isTokenizerWhitespace(cc))
+            WEBVTT_ADVANCE_TO(StartTagAnnotationState);
+        else if (cc == '.')
+            WEBVTT_ADVANCE_TO(StartTagClassState);
+        else if (cc == '&gt;' || cc == kEndOfFileMarker)
+            return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString()));
</ins><span class="cx">         else {
</span><del>-            buffer.append(character);
-            result.append(buffer);
</del><ins>+            result.append(cc);
+            WEBVTT_ADVANCE_TO(StartTagState);
</ins><span class="cx">         }
</span><del>-        buffer.clear();
-        WEBVTT_ADVANCE_TO(DataState);
-    } else if (isASCIIAlphanumeric(character)) {
-        buffer.append(character);
-        WEBVTT_ADVANCE_TO(EscapeState);
-    } else if (character == '&lt;') {
-        result.append(buffer);
-        return emitToken(token, WebVTTToken::StringToken(result.toString()));
-    } else if (character == kEndOfFileMarker) {
-        result.append(buffer);
-        return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString()));
-    } else {
-        result.append(buffer);
-        buffer.clear();
</del><ins>+    }
+    END_STATE()
</ins><span class="cx"> 
</span><del>-        if (character == '&amp;') {
-            buffer.append('&amp;');
-            WEBVTT_ADVANCE_TO(EscapeState);
</del><ins>+    WEBVTT_BEGIN_STATE(StartTagClassState) {
+        if (isTokenizerWhitespace(cc)) {
+            addNewClass(classes, buffer);
+            buffer.clear();
+            WEBVTT_ADVANCE_TO(StartTagAnnotationState);
+        } else if (cc == '.') {
+            addNewClass(classes, buffer);
+            buffer.clear();
+            WEBVTT_ADVANCE_TO(StartTagClassState);
+        } else if (cc == '&gt;' || cc == kEndOfFileMarker) {
+            addNewClass(classes, buffer);
+            buffer.clear();
+            return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString()));
+        } else {
+            buffer.append(cc);
+            WEBVTT_ADVANCE_TO(StartTagClassState);
</ins><span class="cx">         }
</span><del>-        result.append(character);
-        WEBVTT_ADVANCE_TO(DataState);
</del><ins>+
</ins><span class="cx">     }
</span><ins>+    END_STATE()
</ins><span class="cx"> 
</span><del>-TagState:
-    if (isTokenizerWhitespace(character)) {
-        ASSERT(result.isEmpty());
</del><ins>+    WEBVTT_BEGIN_STATE(StartTagAnnotationState) {
+        if (cc == '&gt;' || cc == kEndOfFileMarker) {
+            return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString(), buffer.toAtomicString()));
+        }
+        buffer.append(cc);
</ins><span class="cx">         WEBVTT_ADVANCE_TO(StartTagAnnotationState);
</span><del>-    } else if (character == '.') {
-        ASSERT(result.isEmpty());
-        WEBVTT_ADVANCE_TO(StartTagClassState);
-    } else if (character == '/') {
</del><ins>+    }
+    END_STATE()
+    
+    WEBVTT_BEGIN_STATE(EndTagState) {
+        if (cc == '&gt;' || cc == kEndOfFileMarker)
+            return advanceAndEmitToken(m_input, token, WebVTTToken::EndTag(result.toString()));
+        result.append(cc);
</ins><span class="cx">         WEBVTT_ADVANCE_TO(EndTagState);
</span><del>-    } else if (WTF::isASCIIDigit(character)) {
-        result.append(character);
-        WEBVTT_ADVANCE_TO(TimestampTagState);
-    } else if (character == '&gt;' || character == kEndOfFileMarker) {
-        ASSERT(result.isEmpty());
-        return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString()));
-    } else {
-        result.append(character);
-        WEBVTT_ADVANCE_TO(StartTagState);
</del><span class="cx">     }
</span><ins>+    END_STATE()
</ins><span class="cx"> 
</span><del>-StartTagState:
-    if (isTokenizerWhitespace(character))
-        WEBVTT_ADVANCE_TO(StartTagAnnotationState);
-    else if (character == '.')
-        WEBVTT_ADVANCE_TO(StartTagClassState);
-    else if (character == '&gt;' || character == kEndOfFileMarker)
-        return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString()));
-    else {
-        result.append(character);
-        WEBVTT_ADVANCE_TO(StartTagState);
</del><ins>+    WEBVTT_BEGIN_STATE(TimestampTagState) {
+        if (cc == '&gt;' || cc == kEndOfFileMarker)
+            return advanceAndEmitToken(m_input, token, WebVTTToken::TimestampTag(result.toString()));
+        result.append(cc);
+        WEBVTT_ADVANCE_TO(TimestampTagState);
</ins><span class="cx">     }
</span><ins>+    END_STATE()
</ins><span class="cx"> 
</span><del>-StartTagClassState:
-    if (isTokenizerWhitespace(character)) {
-        addNewClass(classes, buffer);
-        buffer.clear();
-        WEBVTT_ADVANCE_TO(StartTagAnnotationState);
-    } else if (character == '.') {
-        addNewClass(classes, buffer);
-        buffer.clear();
-        WEBVTT_ADVANCE_TO(StartTagClassState);
-    } else if (character == '&gt;' || character == kEndOfFileMarker) {
-        addNewClass(classes, buffer);
-        buffer.clear();
-        return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString()));
-    } else {
-        buffer.append(character);
-        WEBVTT_ADVANCE_TO(StartTagClassState);
</del><span class="cx">     }
</span><span class="cx"> 
</span><del>-StartTagAnnotationState:
-    if (character == '&gt;' || character == kEndOfFileMarker)
-        return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString(), buffer.toAtomicString()));
-    buffer.append(character);
-    WEBVTT_ADVANCE_TO(StartTagAnnotationState);
-
-EndTagState:
-    if (character == '&gt;' || character == kEndOfFileMarker)
-        return advanceAndEmitToken(m_input, token, WebVTTToken::EndTag(result.toString()));
-    result.append(character);
-    WEBVTT_ADVANCE_TO(EndTagState);
-
-TimestampTagState:
-    if (character == '&gt;' || character == kEndOfFileMarker)
-        return advanceAndEmitToken(m_input, token, WebVTTToken::TimestampTag(result.toString()));
-    result.append(character);
-    WEBVTT_ADVANCE_TO(TimestampTagState);
</del><ins>+    ASSERT_NOT_REACHED();
+    return false;
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> }
</span></span></pre></div>
<a id="trunkSourceWebCorehtmltrackWebVTTTokenizerh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/html/track/WebVTTTokenizer.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/html/track/WebVTTTokenizer.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/html/track/WebVTTTokenizer.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -40,15 +40,19 @@
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><span class="cx"> class WebVTTTokenizer {
</span><ins>+    WTF_MAKE_NONCOPYABLE(WebVTTTokenizer);
</ins><span class="cx"> public:
</span><span class="cx">     explicit WebVTTTokenizer(const String&amp;);
</span><ins>+
</ins><span class="cx">     bool nextToken(WebVTTToken&amp;);
</span><span class="cx"> 
</span><del>-    static bool neverSkipNullCharacters() { return false; }
</del><ins>+    inline bool shouldSkipNullCharacters() const { return true; }
</ins><span class="cx"> 
</span><span class="cx"> private:
</span><span class="cx">     SegmentedString m_input;
</span><del>-    InputStreamPreprocessor&lt;WebVTTTokenizer&gt; m_preprocessor;
</del><ins>+
+    // ://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
+    InputStreamPreprocessor&lt;WebVTTTokenizer&gt; m_inputStreamPreprocessor;
</ins><span class="cx"> };
</span><span class="cx"> 
</span><span class="cx"> }
</span></span></pre></div>
<a id="trunkSourceWebCoreplatformtextSegmentedStringcpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/platform/text/SegmentedString.cpp (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/platform/text/SegmentedString.cpp        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/platform/text/SegmentedString.cpp        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -20,8 +20,6 @@
</span><span class="cx"> #include &quot;config.h&quot;
</span><span class="cx"> #include &quot;SegmentedString.h&quot;
</span><span class="cx"> 
</span><del>-#include &lt;wtf/text/TextPosition.h&gt;
-
</del><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><span class="cx"> SegmentedString::SegmentedString(const SegmentedString&amp; other)
</span><span class="lines">@@ -46,7 +44,7 @@
</span><span class="cx">         m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-SegmentedString&amp; SegmentedString::operator=(const SegmentedString&amp; other)
</del><ins>+const SegmentedString&amp; SegmentedString::operator=(const SegmentedString&amp; other)
</ins><span class="cx"> {
</span><span class="cx">     m_pushedChar1 = other.m_pushedChar1;
</span><span class="cx">     m_pushedChar2 = other.m_pushedChar2;
</span><span class="lines">@@ -132,14 +130,14 @@
</span><span class="cx">     m_empty = false;
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void SegmentedString::pushBack(const SegmentedSubstring&amp; s)
</del><ins>+void SegmentedString::prepend(const SegmentedSubstring&amp; s)
</ins><span class="cx"> {
</span><del>-    ASSERT(!m_pushedChar1);
</del><ins>+    ASSERT(!escaped());
</ins><span class="cx">     ASSERT(!s.numberOfCharactersConsumed());
</span><span class="cx">     if (!s.m_length)
</span><span class="cx">         return;
</span><span class="cx"> 
</span><del>-    // FIXME: We're assuming that the characters were originally consumed by
</del><ins>+    // FIXME: We're assuming that the prepend were originally consumed by
</ins><span class="cx">     //        this SegmentedString.  We're also ASSERTing that s is a fresh
</span><span class="cx">     //        SegmentedSubstring.  These assumptions are sufficient for our
</span><span class="cx">     //        current use, but we might need to handle the more elaborate
</span><span class="lines">@@ -168,7 +166,7 @@
</span><span class="cx"> void SegmentedString::append(const SegmentedString&amp; s)
</span><span class="cx"> {
</span><span class="cx">     ASSERT(!m_closed);
</span><del>-    ASSERT(!s.m_pushedChar1);
</del><ins>+    ASSERT(!s.escaped());
</ins><span class="cx">     append(s.m_currentString);
</span><span class="cx">     if (s.isComposite()) {
</span><span class="cx">         Deque&lt;SegmentedSubstring&gt;::const_iterator it = s.m_substrings.begin();
</span><span class="lines">@@ -179,17 +177,17 @@
</span><span class="cx">     m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0);
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void SegmentedString::pushBack(const SegmentedString&amp; s)
</del><ins>+void SegmentedString::prepend(const SegmentedString&amp; s)
</ins><span class="cx"> {
</span><del>-    ASSERT(!m_pushedChar1);
-    ASSERT(!s.m_pushedChar1);
</del><ins>+    ASSERT(!escaped());
+    ASSERT(!s.escaped());
</ins><span class="cx">     if (s.isComposite()) {
</span><span class="cx">         Deque&lt;SegmentedSubstring&gt;::const_reverse_iterator it = s.m_substrings.rbegin();
</span><span class="cx">         Deque&lt;SegmentedSubstring&gt;::const_reverse_iterator e = s.m_substrings.rend();
</span><span class="cx">         for (; it != e; ++it)
</span><del>-            pushBack(*it);
</del><ins>+            prepend(*it);
</ins><span class="cx">     }
</span><del>-    pushBack(s.m_currentString);
</del><ins>+    prepend(s.m_currentString);
</ins><span class="cx">     m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0);
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -230,12 +228,12 @@
</span><span class="cx">     return result.toString();
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void SegmentedString::advancePastNonNewlines(unsigned count, UChar* consumedCharacters)
</del><ins>+void SegmentedString::advance(unsigned count, UChar* consumedCharacters)
</ins><span class="cx"> {
</span><span class="cx">     ASSERT_WITH_SECURITY_IMPLICATION(count &lt;= length());
</span><span class="cx">     for (unsigned i = 0; i &lt; count; ++i) {
</span><span class="cx">         consumedCharacters[i] = currentChar();
</span><del>-        advancePastNonNewline();
</del><ins>+        advance();
</ins><span class="cx">     }
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -355,7 +353,8 @@
</span><span class="cx"> 
</span><span class="cx"> OrdinalNumber SegmentedString::currentColumn() const
</span><span class="cx"> {
</span><del>-    return OrdinalNumber::fromZeroBasedInt(numberOfCharactersConsumed() - m_numberOfCharactersConsumedPriorToCurrentLine);
</del><ins>+    int zeroBasedColumn = numberOfCharactersConsumed() - m_numberOfCharactersConsumedPriorToCurrentLine;
+    return OrdinalNumber::fromZeroBasedInt(zeroBasedColumn);
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> void SegmentedString::setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength)
</span><span class="lines">@@ -364,18 +363,4 @@
</span><span class="cx">     m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + prologLength - columnAftreProlog.zeroBasedInt();
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-SegmentedString::AdvancePastResult SegmentedString::advancePastSlowCase(const char* literal, bool caseSensitive)
-{
-    unsigned length = strlen(literal);
-    if (length &gt; this-&gt;length())
-        return NotEnoughCharacters;
-    UChar* consumedCharacters;
-    String consumedString = String::createUninitialized(length, consumedCharacters);
-    advancePastNonNewlines(length, consumedCharacters);
-    if (consumedString.startsWith(literal, caseSensitive))
-        return DidMatch;
-    pushBack(SegmentedString(consumedString));
-    return DidNotMatch;
</del><span class="cx"> }
</span><del>-
-}
</del></span></pre></div>
<a id="trunkSourceWebCoreplatformtextSegmentedStringh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/platform/text/SegmentedString.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/platform/text/SegmentedString.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/platform/text/SegmentedString.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,5 +1,5 @@
</span><span class="cx"> /*
</span><del>-    Copyright (C) 2004-2008, 2015 Apple Inc. All rights reserved.
</del><ins>+    Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
</ins><span class="cx"> 
</span><span class="cx">     This library is free software; you can redistribute it and/or
</span><span class="cx">     modify it under the terms of the GNU Library General Public
</span><span class="lines">@@ -22,6 +22,8 @@
</span><span class="cx"> 
</span><span class="cx"> #include &lt;wtf/Deque.h&gt;
</span><span class="cx"> #include &lt;wtf/text/StringBuilder.h&gt;
</span><ins>+#include &lt;wtf/text/TextPosition.h&gt;
+#include &lt;wtf/text/WTFString.h&gt;
</ins><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><span class="lines">@@ -168,14 +170,16 @@
</span><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     SegmentedString(const SegmentedString&amp;);
</span><del>-    SegmentedString&amp; operator=(const SegmentedString&amp;);
</del><span class="cx"> 
</span><ins>+    const SegmentedString&amp; operator=(const SegmentedString&amp;);
+
</ins><span class="cx">     void clear();
</span><span class="cx">     void close();
</span><span class="cx"> 
</span><span class="cx">     void append(const SegmentedString&amp;);
</span><del>-    void pushBack(const SegmentedString&amp;);
</del><ins>+    void prepend(const SegmentedString&amp;);
</ins><span class="cx"> 
</span><ins>+    bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
</ins><span class="cx">     void setExcludeLineNumbers();
</span><span class="cx"> 
</span><span class="cx">     void push(UChar c)
</span><span class="lines">@@ -195,10 +199,15 @@
</span><span class="cx"> 
</span><span class="cx">     bool isClosed() const { return m_closed; }
</span><span class="cx"> 
</span><del>-    enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters };
-    template&lt;unsigned length&gt; AdvancePastResult advancePast(const char (&amp;literal)[length]) { return advancePast(literal, length - 1, true); }
-    template&lt;unsigned length&gt; AdvancePastResult advancePastIgnoringCase(const char (&amp;literal)[length]) { return advancePast(literal, length - 1, false); }
</del><ins>+    enum LookAheadResult {
+        DidNotMatch,
+        DidMatch,
+        NotEnoughCharacters,
+    };
</ins><span class="cx"> 
</span><ins>+    LookAheadResult lookAhead(const String&amp; string) { return lookAheadInline(string, true); }
+    LookAheadResult lookAheadIgnoringCase(const String&amp; string) { return lookAheadInline(string, false); }
+
</ins><span class="cx">     void advance()
</span><span class="cx">     {
</span><span class="cx">         if (m_fastPathFlags &amp; Use8BitAdvance) {
</span><span class="lines">@@ -217,7 +226,7 @@
</span><span class="cx">         (this-&gt;*m_advanceFunc)();
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    void advanceAndUpdateLineNumber()
</del><ins>+    inline void advanceAndUpdateLineNumber()
</ins><span class="cx">     {
</span><span class="cx">         if (m_fastPathFlags &amp; Use8BitAdvance) {
</span><span class="cx">             ASSERT(!m_pushedChar1);
</span><span class="lines">@@ -244,6 +253,18 @@
</span><span class="cx">         (this-&gt;*m_advanceAndUpdateLineNumberFunc)();
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    void advanceAndASSERT(UChar expectedCharacter)
+    {
+        ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
+        advance();
+    }
+
+    void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
+    {
+        ASSERT_UNUSED(expectedCharacter, u_foldCase(currentChar(), U_FOLD_CASE_DEFAULT) == u_foldCase(expectedCharacter, U_FOLD_CASE_DEFAULT));
+        advance();
+    }
+
</ins><span class="cx">     void advancePastNonNewline()
</span><span class="cx">     {
</span><span class="cx">         ASSERT(currentChar() != '\n');
</span><span class="lines">@@ -265,6 +286,12 @@
</span><span class="cx">         advanceAndUpdateLineNumberSlowCase();
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    // Writes the consumed characters into consumedCharacters, which must
+    // have space for at least |count| characters.
+    void advance(unsigned count, UChar* consumedCharacters);
+
+    bool escaped() const { return m_pushedChar1; }
+
</ins><span class="cx">     int numberOfCharactersConsumed() const
</span><span class="cx">     {
</span><span class="cx">         int numberOfPushedCharacters = 0;
</span><span class="lines">@@ -280,12 +307,12 @@
</span><span class="cx"> 
</span><span class="cx">     UChar currentChar() const { return m_currentChar; }    
</span><span class="cx"> 
</span><ins>+    // The method is moderately slow, comparing to currentLine method.
</ins><span class="cx">     OrdinalNumber currentColumn() const;
</span><span class="cx">     OrdinalNumber currentLine() const;
</span><del>-
-    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog
</del><ins>+    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
</ins><span class="cx">     // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
</span><del>-    void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength);
</del><ins>+    void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
</ins><span class="cx"> 
</span><span class="cx"> private:
</span><span class="cx">     enum FastPathFlags {
</span><span class="lines">@@ -295,7 +322,7 @@
</span><span class="cx">     };
</span><span class="cx"> 
</span><span class="cx">     void append(const SegmentedSubstring&amp;);
</span><del>-    void pushBack(const SegmentedSubstring&amp;);
</del><ins>+    void prepend(const SegmentedSubstring&amp;);
</ins><span class="cx"> 
</span><span class="cx">     void advance8();
</span><span class="cx">     void advance16();
</span><span class="lines">@@ -347,13 +374,32 @@
</span><span class="cx">         updateSlowCaseFunctionPointers();
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    // Writes consumed characters into consumedCharacters, which must have space for at least |count| characters.
-    void advancePastNonNewlines(unsigned count);
-    void advancePastNonNewlines(unsigned count, UChar* consumedCharacters);
</del><ins>+    inline LookAheadResult lookAheadInline(const String&amp; string, bool caseSensitive)
+    {
+        if (!m_pushedChar1 &amp;&amp; string.length() &lt;= static_cast&lt;unsigned&gt;(m_currentString.m_length)) {
+            String currentSubstring = m_currentString.currentSubString(string.length());
+            if (currentSubstring.startsWith(string, caseSensitive))
+                return DidMatch;
+            return DidNotMatch;
+        }
+        return lookAheadSlowCase(string, caseSensitive);
+    }
+    
+    LookAheadResult lookAheadSlowCase(const String&amp; string, bool caseSensitive)
+    {
+        unsigned count = string.length();
+        if (count &gt; length())
+            return NotEnoughCharacters;
+        UChar* consumedCharacters;
+        String consumedString = String::createUninitialized(count, consumedCharacters);
+        advance(count, consumedCharacters);
+        LookAheadResult result = DidNotMatch;
+        if (consumedString.startsWith(string, caseSensitive))
+            result = DidMatch;
+        prepend(SegmentedString(consumedString));
+        return result;
+    }
</ins><span class="cx"> 
</span><del>-    AdvancePastResult advancePast(const char* literal, unsigned length, bool caseSensitive);
-    AdvancePastResult advancePastSlowCase(const char* literal, bool caseSensitive);
-
</del><span class="cx">     bool isComposite() const { return !m_substrings.isEmpty(); }
</span><span class="cx"> 
</span><span class="cx">     UChar m_pushedChar1;
</span><span class="lines">@@ -371,27 +417,6 @@
</span><span class="cx">     void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
</span><span class="cx"> };
</span><span class="cx"> 
</span><del>-inline void SegmentedString::advancePastNonNewlines(unsigned count)
-{
-    for (unsigned i = 0; i &lt; count; ++i)
-        advancePastNonNewline();
</del><span class="cx"> }
</span><span class="cx"> 
</span><del>-inline SegmentedString::AdvancePastResult SegmentedString::advancePast(const char* literal, unsigned length, bool caseSensitive)
-{
-    ASSERT(strlen(literal) == length);
-    ASSERT(!strchr(literal, '\n'));
-    if (!m_pushedChar1) {
-        if (length &lt;= static_cast&lt;unsigned&gt;(m_currentString.m_length)) {
-            if (!m_currentString.currentSubString(length).startsWith(literal, caseSensitive))
-                return DidNotMatch;
-            advancePastNonNewlines(length);
-            return DidMatch;
-        }
-    }
-    return advancePastSlowCase(literal, caseSensitive);
-}
-
-}
-
</del><span class="cx"> #endif
</span></span></pre></div>
<a id="trunkSourceWebCorexmlparserCharacterReferenceParserInlinesh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlines.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlines.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlines.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -31,6 +31,11 @@
</span><span class="cx"> 
</span><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><ins>+inline bool isHexDigit(UChar cc)
+{
+    return (cc &gt;= '0' &amp;&amp; cc &lt;= '9') || (cc &gt;= 'a' &amp;&amp; cc &lt;= 'f') || (cc &gt;= 'A' &amp;&amp; cc &lt;= 'F');
+}
+
</ins><span class="cx"> inline void unconsumeCharacters(SegmentedString&amp; source, const StringBuilder&amp; consumedCharacters)
</span><span class="cx"> {
</span><span class="cx">     if (consumedCharacters.length() == 1)
</span><span class="lines">@@ -39,7 +44,7 @@
</span><span class="cx">         source.push(consumedCharacters[0]);
</span><span class="cx">         source.push(consumedCharacters[1]);
</span><span class="cx">     } else
</span><del>-        source.pushBack(SegmentedString(consumedCharacters.toStringPreserveCapacity()));
</del><ins>+        source.prepend(SegmentedString(consumedCharacters.toStringPreserveCapacity()));
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> template &lt;typename ParserFunctions&gt;
</span><span class="lines">@@ -49,7 +54,7 @@
</span><span class="cx">     ASSERT(!notEnoughCharacters);
</span><span class="cx">     ASSERT(decodedCharacter.isEmpty());
</span><span class="cx">     
</span><del>-    enum {
</del><ins>+    enum EntityState {
</ins><span class="cx">         Initial,
</span><span class="cx">         Number,
</span><span class="cx">         MaybeHexLowerCaseX,
</span><span class="lines">@@ -57,97 +62,111 @@
</span><span class="cx">         Hex,
</span><span class="cx">         Decimal,
</span><span class="cx">         Named
</span><del>-    } state = Initial;
</del><ins>+    };
+    EntityState entityState = Initial;
</ins><span class="cx">     UChar32 result = 0;
</span><ins>+    bool overflow = false;
+    const UChar32 highestValidCharacter = 0x10FFFF;
</ins><span class="cx">     StringBuilder consumedCharacters;
</span><span class="cx">     
</span><span class="cx">     while (!source.isEmpty()) {
</span><del>-        UChar character = source.currentChar();
-        switch (state) {
-        case Initial:
-            if (character == '\x09' || character == '\x0A' || character == '\x0C' || character == ' ' || character == '&lt;' || character == '&amp;')
</del><ins>+        UChar cc = source.currentChar();
+        switch (entityState) {
+        case Initial: {
+            if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '&lt;' || cc == '&amp;')
</ins><span class="cx">                 return false;
</span><del>-            if (additionalAllowedCharacter &amp;&amp; character == additionalAllowedCharacter)
</del><ins>+            if (additionalAllowedCharacter &amp;&amp; cc == additionalAllowedCharacter)
</ins><span class="cx">                 return false;
</span><del>-            if (character == '#') {
-                state = Number;
</del><ins>+            if (cc == '#') {
+                entityState = Number;
</ins><span class="cx">                 break;
</span><span class="cx">             }
</span><del>-            if (isASCIIAlpha(character)) {
-                state = Named;
-                goto Named;
</del><ins>+            if ((cc &gt;= 'a' &amp;&amp; cc &lt;= 'z') || (cc &gt;= 'A' &amp;&amp; cc &lt;= 'Z')) {
+                entityState = Named;
+                continue;
</ins><span class="cx">             }
</span><span class="cx">             return false;
</span><del>-        case Number:
-            if (character == 'x') {
-                state = MaybeHexLowerCaseX;
</del><ins>+        }
+        case Number: {
+            if (cc == 'x') {
+                entityState = MaybeHexLowerCaseX;
</ins><span class="cx">                 break;
</span><span class="cx">             }
</span><del>-            if (character == 'X') {
-                state = MaybeHexUpperCaseX;
</del><ins>+            if (cc == 'X') {
+                entityState = MaybeHexUpperCaseX;
</ins><span class="cx">                 break;
</span><span class="cx">             }
</span><del>-            if (isASCIIDigit(character)) {
-                state = Decimal;
-                goto Decimal;
</del><ins>+            if (cc &gt;= '0' &amp;&amp; cc &lt;= '9') {
+                entityState = Decimal;
+                continue;
</ins><span class="cx">             }
</span><span class="cx">             source.push('#');
</span><span class="cx">             return false;
</span><del>-        case MaybeHexLowerCaseX:
-            if (isASCIIHexDigit(character)) {
-                state = Hex;
-                goto Hex;
</del><ins>+        }
+        case MaybeHexLowerCaseX: {
+            if (isHexDigit(cc)) {
+                entityState = Hex;
+                continue;
</ins><span class="cx">             }
</span><span class="cx">             source.push('#');
</span><span class="cx">             source.push('x');
</span><span class="cx">             return false;
</span><del>-        case MaybeHexUpperCaseX:
-            if (isASCIIHexDigit(character)) {
-                state = Hex;
-                goto Hex;
</del><ins>+        }
+        case MaybeHexUpperCaseX: {
+            if (isHexDigit(cc)) {
+                entityState = Hex;
+                continue;
</ins><span class="cx">             }
</span><span class="cx">             source.push('#');
</span><span class="cx">             source.push('X');
</span><span class="cx">             return false;
</span><del>-        case Hex:
-        Hex:
-            if (isASCIIHexDigit(character)) {
-                result = result * 16 + toASCIIHexValue(character);
-                break;
-            }
-            if (character == ';') {
-                source.advance();
-                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
</del><ins>+        }
+        case Hex: {
+            if (cc &gt;= '0' &amp;&amp; cc &lt;= '9')
+                result = result * 16 + cc - '0';
+            else if (cc &gt;= 'a' &amp;&amp; cc &lt;= 'f')
+                result = result * 16 + 10 + cc - 'a';
+            else if (cc &gt;= 'A' &amp;&amp; cc &lt;= 'F')
+                result = result * 16 + 10 + cc - 'A';
+            else if (cc == ';') {
+                source.advanceAndASSERT(cc);
+                decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
</ins><span class="cx">                 return true;
</span><del>-            }
-            if (ParserFunctions::acceptMalformed()) {
-                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
</del><ins>+            } else if (ParserFunctions::acceptMalformed()) {
+                decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
</ins><span class="cx">                 return true;
</span><ins>+            } else {
+                unconsumeCharacters(source, consumedCharacters);
+                return false;
</ins><span class="cx">             }
</span><del>-            unconsumeCharacters(source, consumedCharacters);
-            return false;
-        case Decimal:
-        Decimal:
-            if (isASCIIDigit(character)) {
-                // FIXME: What about overflow?
-                result = result * 10 + character - '0';
-                break;
-            }
-            if (character == ';') {
-                source.advance();
-                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
</del><ins>+            if (result &gt; highestValidCharacter)
+                overflow = true;
+            break;
+        }
+        case Decimal: {
+            if (cc &gt;= '0' &amp;&amp; cc &lt;= '9')
+                result = result * 10 + cc - '0';
+            else if (cc == ';') {
+                source.advanceAndASSERT(cc);
+                decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
</ins><span class="cx">                 return true;
</span><ins>+            } else if (ParserFunctions::acceptMalformed()) {
+                decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
+                return true;
+            } else {
+                unconsumeCharacters(source, consumedCharacters);
+                return false;
</ins><span class="cx">             }
</span><del>-            if (ParserFunctions::acceptMalformed())
-                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
-            unconsumeCharacters(source, consumedCharacters);
-            return false;
-        case Named:
-        Named:
-            return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, character);
</del><ins>+            if (result &gt; highestValidCharacter)
+                overflow = true;
+            break;
</ins><span class="cx">         }
</span><del>-        consumedCharacters.append(character);
-        source.advance();
</del><ins>+        case Named: {
+            return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, cc);
+        }
+        }
+        consumedCharacters.append(cc);
+        source.advanceAndASSERT(cc);
</ins><span class="cx">     }
</span><span class="cx">     ASSERT(source.isEmpty());
</span><span class="cx">     notEnoughCharacters = true;
</span></span></pre></div>
<a id="trunkSourceWebCorexmlparserMarkupTokenizerInlinesh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/xml/parser/MarkupTokenizerInlines.h (178172 => 178173)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/xml/parser/MarkupTokenizerInlines.h        2015-01-09 17:16:15 UTC (rev 178172)
+++ trunk/Source/WebCore/xml/parser/MarkupTokenizerInlines.h        2015-01-09 17:44:37 UTC (rev 178173)
</span><span class="lines">@@ -1,5 +1,5 @@
</span><span class="cx"> /*
</span><del>- * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved.
</del><ins>+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
</ins><span class="cx">  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
</span><span class="cx">  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
</span><span class="cx">  *
</span><span class="lines">@@ -30,61 +30,64 @@
</span><span class="cx"> 
</span><span class="cx"> #include &quot;SegmentedString.h&quot;
</span><span class="cx"> 
</span><del>-#if COMPILER(MSVC)
-// Disable the &quot;unreachable code&quot; warning so we can compile the ASSERT_NOT_REACHED in the END_STATE macro.
-#pragma warning(disable: 4702)
-#endif
-
</del><span class="cx"> namespace WebCore {
</span><span class="cx"> 
</span><del>-inline bool isTokenizerWhitespace(UChar character)
</del><ins>+inline bool isTokenizerWhitespace(UChar cc)
</ins><span class="cx"> {
</span><del>-    return character == ' ' || character == '\x0A' || character == '\x09' || character == '\x0C';
</del><ins>+    return cc == ' ' || cc == '\x0A' || cc == '\x09' || cc == '\x0C';
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-#define BEGIN_STATE(stateName)                                  \
-    case stateName:                                             \
-    stateName: {                                                \
-        const auto currentState = stateName;                    \
-        UNUSED_PARAM(currentState);
</del><ins>+inline void advanceStringAndASSERTIgnoringCase(SegmentedString&amp; source, const char* expectedCharacters)
+{
+    while (*expectedCharacters)
+        source.advanceAndASSERTIgnoringCase(*expectedCharacters++);
+}
</ins><span class="cx"> 
</span><del>-#define END_STATE()                                             \
-        ASSERT_NOT_REACHED();                                   \
-        break;                                                  \
-    }
</del><ins>+inline void advanceStringAndASSERT(SegmentedString&amp; source, const char* expectedCharacters)
+{
+    while (*expectedCharacters)
+        source.advanceAndASSERT(*expectedCharacters++);
+}
</ins><span class="cx"> 
</span><del>-#define RETURN_IN_CURRENT_STATE(expression)                     \
-    do {                                                        \
-        m_state = currentState;                                 \
-        return expression;                                      \
-    } while (false)
</del><ins>+#if COMPILER(MSVC)
+// We need to disable the &quot;unreachable code&quot; warning because we want to assert
+// that some code points aren't reached in the state machine.
+#pragma warning(disable: 4702)
+#endif
</ins><span class="cx"> 
</span><del>-// We use this macro when the HTML spec says &quot;reconsume the current input character in the &lt;mumble&gt; state.&quot;
-#define RECONSUME_IN(newState)                                  \
-    do {                                                        \
-        goto newState;                                          \
</del><ins>+#define BEGIN_STATE(prefix, stateName) case prefix::stateName: stateName:
+#define END_STATE() ASSERT_NOT_REACHED(); break;
+
+// We use this macro when the HTML5 spec says &quot;reconsume the current input
+// character in the &lt;mumble&gt; state.&quot;
+#define RECONSUME_IN(prefix, stateName)                                    \
+    do {                                                                   \
+        m_state = prefix::stateName;                                       \
+        goto stateName;                                                    \
</ins><span class="cx">     } while (false)
</span><span class="cx"> 
</span><del>-// We use this macro when the HTML spec says &quot;consume the next input character ... and switch to the &lt;mumble&gt; state.&quot;
-#define ADVANCE_TO(newState)                                    \
-    do {                                                        \
-        if (!m_preprocessor.advance(source, isNullCharacterSkippingState(newState))) { \
-            m_state = newState;                                 \
-            return haveBufferedCharacterToken();                \
-        }                                                       \
-        character = m_preprocessor.nextInputCharacter();        \
-        goto newState;                                          \
</del><ins>+// We use this macro when the HTML5 spec says &quot;consume the next input
+// character ... and switch to the &lt;mumble&gt; state.&quot;
+#define ADVANCE_TO(prefix, stateName)                                      \
+    do {                                                                   \
+        m_state = prefix::stateName;                                       \
+        if (!m_inputStreamPreprocessor.advance(source))                    \
+            return haveBufferedCharacterToken();                           \
+        cc = m_inputStreamPreprocessor.nextInputCharacter();               \
+        goto stateName;                                                    \
</ins><span class="cx">     } while (false)
</span><span class="cx"> 
</span><del>-// For more complex cases, caller consumes the characters first and then uses this macro.
-#define SWITCH_TO(newState)                                     \
-    do {                                                        \
-        if (!m_preprocessor.peek(source, isNullCharacterSkippingState(newState))) { \
-            m_state = newState;                                 \
-            return haveBufferedCharacterToken();                \
-        }                                                       \
-        character = m_preprocessor.nextInputCharacter();        \
-        goto newState;                                          \
</del><ins>+// Sometimes there's more complicated logic in the spec that separates when
+// we consume the next input character and when we switch to a particular
+// state. We handle those cases by advancing the source directly and using
+// this macro to switch to the indicated state.
+#define SWITCH_TO(prefix, stateName)                                       \
+    do {                                                                   \
+        m_state = prefix::stateName;                                       \
+        if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))   \
+            return haveBufferedCharacterToken();                           \
+        cc = m_inputStreamPreprocessor.nextInputCharacter();               \
+        goto stateName;                                                    \
</ins><span class="cx">     } while (false)
</span><span class="cx"> 
</span><span class="cx"> }
</span></span></pre>
</div>
</div>

</body>
</html>