<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[183818] trunk</title>
</head>
<body>

<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.webkit.org/projects/webkit/changeset/183818">183818</a></dd>
<dt>Author</dt> <dd>achristensen@apple.com</dd>
<dt>Date</dt> <dd>2015-05-05 10:27:41 -0700 (Tue, 05 May 2015)</dd>
</dl>

<h3>Log Message</h3>
<pre>[Content Extensions] Combine NFAs properly and free memory as we compile.
https://bugs.webkit.org/show_bug.cgi?id=144485

Reviewed by Benjamin Poulain.

Source/WebCore:

This patch correctly combines all regular expressions with a common prefix up to
the last quantified term into the same NFA.  It also deletes the prefix tree as it 
creates NFAs, thus reducing the maximum memory used when compiling.

* contentextensions/CombinedURLFilters.cpp:
(WebCore::ContentExtensions::CombinedURLFilters::isEmpty):
(WebCore::ContentExtensions::CombinedURLFilters::addPattern):
(WebCore::ContentExtensions::generateNFAForSubtree):
(WebCore::ContentExtensions::CombinedURLFilters::processNFAs):
(WebCore::ContentExtensions::CombinedURLFilters::clear): Deleted.
* contentextensions/CombinedURLFilters.h:
* contentextensions/ContentExtensionCompiler.cpp:
(WebCore::ContentExtensions::compileRuleList):
* contentextensions/ContentExtensionsDebugging.h:

Tools:

* TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
(TestWebKitAPI::TEST_F):
Added tests for correctly splitting up NFAs with unquantified terms after quantified terms.
Added tests for deep NFAs.</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkSourceWebCoreChangeLog">trunk/Source/WebCore/ChangeLog</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsCombinedURLFilterscpp">trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsCombinedURLFiltersh">trunk/Source/WebCore/contentextensions/CombinedURLFilters.h</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsContentExtensionCompilercpp">trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsContentExtensionsDebuggingh">trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h</a></li>
<li><a href="#trunkToolsChangeLog">trunk/Tools/ChangeLog</a></li>
<li><a href="#trunkToolsTestWebKitAPITestsWebCoreContentExtensionscpp">trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkSourceWebCoreChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/ChangeLog (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/ChangeLog        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/ChangeLog        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -1,3 +1,25 @@
</span><ins>+2015-05-05  Alex Christensen  &lt;achristensen@webkit.org&gt;
+
+        [Content Extensions] Combine NFAs properly and free memory as we compile.
+        https://bugs.webkit.org/show_bug.cgi?id=144485
+
+        Reviewed by Benjamin Poulain.
+
+        This patch correctly combines all regular expressions with a common prefix up to
+        the last quantified term into the same NFA.  It also deletes the prefix tree as it 
+        creates NFAs, thus reducing the maximum memory used when compiling.
+
+        * contentextensions/CombinedURLFilters.cpp:
+        (WebCore::ContentExtensions::CombinedURLFilters::isEmpty):
+        (WebCore::ContentExtensions::CombinedURLFilters::addPattern):
+        (WebCore::ContentExtensions::generateNFAForSubtree):
+        (WebCore::ContentExtensions::CombinedURLFilters::processNFAs):
+        (WebCore::ContentExtensions::CombinedURLFilters::clear): Deleted.
+        * contentextensions/CombinedURLFilters.h:
+        * contentextensions/ContentExtensionCompiler.cpp:
+        (WebCore::ContentExtensions::compileRuleList):
+        * contentextensions/ContentExtensionsDebugging.h:
+
</ins><span class="cx"> 2015-05-04  Alex Christensen  &lt;achristensen@webkit.org&gt;
</span><span class="cx"> 
</span><span class="cx">         [Content Extensions] Use less memory when writing byte code to file
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsCombinedURLFilterscpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -47,7 +47,6 @@
</span><span class="cx"> struct PrefixTreeVertex {
</span><span class="cx">     PrefixTreeEdges edges;
</span><span class="cx">     ActionList finalActions;
</span><del>-    bool inVariableLengthPrefix { false };
</del><span class="cx"> };
</span><span class="cx"> 
</span><span class="cx"> #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
</span><span class="lines">@@ -116,9 +115,9 @@
</span><span class="cx"> {
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void CombinedURLFilters::clear()
</del><ins>+bool CombinedURLFilters::isEmpty()
</ins><span class="cx"> {
</span><del>-    m_prefixTreeRoot = std::make_unique&lt;PrefixTreeVertex&gt;();
</del><ins>+    return m_prefixTreeRoot-&gt;edges.isEmpty();
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> void CombinedURLFilters::addPattern(uint64_t actionId, const Vector&lt;Term&gt;&amp; pattern)
</span><span class="lines">@@ -128,13 +127,8 @@
</span><span class="cx">     if (pattern.isEmpty())
</span><span class="cx">         return;
</span><span class="cx"> 
</span><del>-    Vector&lt;PrefixTreeVertex*, 128&gt; prefixTreeVerticesForPattern;
-    prefixTreeVerticesForPattern.reserveInitialCapacity(pattern.size() + 1);
-
</del><span class="cx">     // Extend the prefix tree with the new pattern.
</span><del>-    bool hasNewTerm = false;
</del><span class="cx">     PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get();
</span><del>-    prefixTreeVerticesForPattern.append(lastPrefixTree);
</del><span class="cx"> 
</span><span class="cx">     for (const Term&amp; term : pattern) {
</span><span class="cx">         size_t nextEntryIndex = WTF::notFound;
</span><span class="lines">@@ -147,129 +141,125 @@
</span><span class="cx">         if (nextEntryIndex != WTF::notFound)
</span><span class="cx">             lastPrefixTree = lastPrefixTree-&gt;edges[nextEntryIndex].child.get();
</span><span class="cx">         else {
</span><del>-            hasNewTerm = true;
-
</del><span class="cx">             lastPrefixTree-&gt;edges.append(PrefixTreeEdge({term, std::make_unique&lt;PrefixTreeVertex&gt;()}));
</span><span class="cx">             lastPrefixTree = lastPrefixTree-&gt;edges.last().child.get();
</span><span class="cx">         }
</span><del>-        prefixTreeVerticesForPattern.append(lastPrefixTree);
</del><span class="cx">     }
</span><span class="cx"> 
</span><del>-    ActionList&amp; actions = prefixTreeVerticesForPattern.last()-&gt;finalActions;
</del><ins>+    ActionList&amp; actions = lastPrefixTree-&gt;finalActions;
</ins><span class="cx">     if (actions.find(actionId) == WTF::notFound)
</span><span class="cx">         actions.append(actionId);
</span><del>-
-    if (!hasNewTerm)
-        return;
-
-    bool hasSeenVariableLengthTerms = false;
-    for (unsigned i = pattern.size(); i--;) {
-        const Term&amp; term = pattern[i];
-        hasSeenVariableLengthTerms |= !term.hasFixedLength();
-        prefixTreeVerticesForPattern[i + 1]-&gt;inVariableLengthPrefix |= hasSeenVariableLengthTerms;
-    }
-    prefixTreeVerticesForPattern[0]-&gt;inVariableLengthPrefix |= hasSeenVariableLengthTerms;
</del><span class="cx"> }
</span><span class="cx"> 
</span><del>-struct ActiveSubtree {
-    const PrefixTreeVertex* vertex;
-    PrefixTreeEdges::const_iterator iterator;
-};
-
-static void generateNFAForSubtree(NFA&amp; nfa, unsigned rootId, const PrefixTreeVertex&amp; prefixTreeVertex)
</del><ins>+static void generateNFAForSubtree(NFA&amp; nfa, unsigned nfaRootId, PrefixTreeVertex&amp; root)
</ins><span class="cx"> {
</span><del>-    ASSERT_WITH_MESSAGE(!prefixTreeVertex.inVariableLengthPrefix, &quot;This code assumes the subtrees with variable prefix length have already been handled.&quot;);
-
-    struct ActiveNFASubtree : ActiveSubtree {
-        ActiveNFASubtree(const PrefixTreeVertex* vertex, PrefixTreeEdges::const_iterator iterator, unsigned nodeIndex)
-            : ActiveSubtree({ vertex, iterator })
-            , lastNodeIndex(nodeIndex)
</del><ins>+    // This recurses the subtree of the prefix tree.
+    // For each edge that has fixed length (no quantifiers like ?, *, or +) it generates the nfa graph,
+    // recurses into children, and deletes any processed leaf nodes.
+    struct ActiveSubtree {
+        ActiveSubtree(PrefixTreeVertex&amp; vertex, unsigned nfaNodeId, unsigned edgeIndex)
+            : vertex(vertex)
+            , nfaNodeId(nfaNodeId)
+            , edgeIndex(edgeIndex)
</ins><span class="cx">         {
</span><span class="cx">         }
</span><del>-        unsigned lastNodeIndex;
</del><ins>+        PrefixTreeVertex&amp; vertex;
+        unsigned nfaNodeId;
+        unsigned edgeIndex;
</ins><span class="cx">     };
</span><ins>+    Vector&lt;ActiveSubtree&gt; stack;
+    if (!root.edges.isEmpty())
+        stack.append(ActiveSubtree(root, nfaRootId, 0));
+    
+    // Generate graphs for each subtree that does not contain any quantifiers.
+    while (!stack.isEmpty()) {
+        PrefixTreeVertex&amp; vertex = stack.last().vertex;
+        const unsigned edgeIndex = stack.last().edgeIndex;
</ins><span class="cx"> 
</span><del>-    Vector&lt;ActiveNFASubtree&gt; activeStack;
-    activeStack.append(ActiveNFASubtree(&amp;prefixTreeVertex, prefixTreeVertex.edges.begin(), rootId));
-
-    while (true) {
-    ProcessSubtree:
-        for (ActiveNFASubtree&amp; activeSubtree = activeStack.last(); activeSubtree.iterator != activeSubtree.vertex-&gt;edges.end(); ++activeSubtree.iterator) {
-            if (activeSubtree.iterator-&gt;child-&gt;inVariableLengthPrefix)
</del><ins>+        if (edgeIndex &lt; vertex.edges.size()) {
+            auto&amp; edge = vertex.edges[edgeIndex];
+            
+            // Quantified edges in the subtree will be a part of another NFA.
+            if (!edge.term.hasFixedLength()) {
+                stack.last().edgeIndex++;
</ins><span class="cx">                 continue;
</span><del>-
-            const Term&amp; term = activeSubtree.iterator-&gt;term;
-            unsigned newEndNodeIndex = term.generateGraph(nfa, activeSubtree.lastNodeIndex, activeSubtree.iterator-&gt;child-&gt;finalActions);
-
-            PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator-&gt;child.get();
-            if (!prefixTreeVertex-&gt;edges.isEmpty()) {
-                activeStack.append(ActiveNFASubtree(prefixTreeVertex, prefixTreeVertex-&gt;edges.begin(), newEndNodeIndex));
-                goto ProcessSubtree;
</del><span class="cx">             }
</span><ins>+            
+            unsigned subtreeRootId = edge.term.generateGraph(nfa, stack.last().nfaNodeId, edge.child-&gt;finalActions);
+            ASSERT(edge.child.get());
+            stack.append(ActiveSubtree(*edge.child.get(), subtreeRootId, 0));
+        } else {
+            ASSERT(edgeIndex == vertex.edges.size());
+            vertex.edges.removeAllMatching([](PrefixTreeEdge&amp; edge)
+            {
+                return edge.term.isDeletedValue();
+            });
+            stack.removeLast();
+            if (!stack.isEmpty()) {
+                auto&amp; activeSubtree = stack.last();
+                auto&amp; edge = activeSubtree.vertex.edges[stack.last().edgeIndex];
+                if (edge.child-&gt;edges.isEmpty())
+                    edge.term = Term(Term::DeletedValue); // Mark this leaf for deleting.
+                activeSubtree.edgeIndex++;
+            }
</ins><span class="cx">         }
</span><del>-
-        activeStack.removeLast();
-        if (activeStack.isEmpty())
-            break;
-        ++activeStack.last().iterator;
</del><span class="cx">     }
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-void CombinedURLFilters::processNFAs(std::function&lt;void(NFA&amp;&amp;)&gt; handler) const
</del><ins>+void CombinedURLFilters::processNFAs(std::function&lt;void(NFA&amp;&amp;)&gt; handler)
</ins><span class="cx"> {
</span><del>-    Vector&lt;ActiveSubtree&gt; activeStack;
-    activeStack.append(ActiveSubtree({ m_prefixTreeRoot.get(), m_prefixTreeRoot-&gt;edges.begin() }));
-
</del><ins>+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+    print();
+#endif
</ins><span class="cx">     while (true) {
</span><del>-    ProcessSubtree:
-        ActiveSubtree&amp; activeSubtree = activeStack.last();
-
-        // We go depth first into the subtrees with variable prefix. Find the next subtree.
-        for (; activeSubtree.iterator != activeSubtree.vertex-&gt;edges.end(); ++activeSubtree.iterator) {
-            PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator-&gt;child.get();
-            if (prefixTreeVertex-&gt;inVariableLengthPrefix) {
-                activeStack.append(ActiveSubtree({ prefixTreeVertex, prefixTreeVertex-&gt;edges.begin() }));
-                goto ProcessSubtree;
-            }
</del><ins>+        // Traverse out to a leaf.
+        Vector&lt;PrefixTreeVertex*, 128&gt; stack;
+        PrefixTreeVertex* vertex = m_prefixTreeRoot.get();
+        while (true) {
+            ASSERT(vertex);
+            stack.append(vertex);
+            if (vertex-&gt;edges.isEmpty())
+                break;
+            vertex = vertex-&gt;edges.last().child.get();
</ins><span class="cx">         }
</span><del>-
-        // After we reached here, we know that all the subtrees with variable prefixes have been processed,
-        // time to generate the NFA for the graph rooted here.
-        bool needToGenerate = activeSubtree.vertex-&gt;edges.isEmpty() &amp;&amp; !activeSubtree.vertex-&gt;finalActions.isEmpty();
-        if (!needToGenerate) {
-            for (const auto&amp; edge : activeSubtree.vertex-&gt;edges) {
-                if (!edge.child-&gt;inVariableLengthPrefix) {
-                    needToGenerate = true;
-                    break;
-                }
-            }
</del><ins>+        if (stack.size() == 1)
+            break; // We're done once we have processed and removed all the edges in the prefix tree.
+        
+        // Find the prefix root for this NFA. This is the vertex after the last term with a quantifier if there is one,
+        // or the root if there are no quantifiers left.
+        while (stack.size() &gt; 1) {
+            if (!stack[stack.size() - 2]-&gt;edges.last().term.hasFixedLength())
+                break;
+            stack.removeLast();
</ins><span class="cx">         }
</span><del>-
-        if (needToGenerate) {
-            NFA nfa;
-
-            unsigned prefixEnd = nfa.root();
-
-            for (unsigned i = 0; i &lt; activeStack.size() - 1; ++i) {
-                const Term&amp; term = activeStack[i].iterator-&gt;term;
-                prefixEnd = term.generateGraph(nfa, prefixEnd, activeStack[i].iterator-&gt;child-&gt;finalActions);
-            }
-
-            for (const auto&amp; edge : activeSubtree.vertex-&gt;edges) {
-                if (!edge.child-&gt;inVariableLengthPrefix) {
-                    unsigned newSubtreeStart = edge.term.generateGraph(nfa, prefixEnd, edge.child-&gt;finalActions);
-                    generateNFAForSubtree(nfa, newSubtreeStart, *edge.child);
-                }
-            }
-            
-            handler(WTF::move(nfa));
</del><ins>+        ASSERT_WITH_MESSAGE(!stack.isEmpty(), &quot;At least the root should be in the stack&quot;);
+        
+        // Make an NFA with the subtrees for whom this is also the last quantifier (or who also have no quantifier).
+        NFA nfa;
+        // Put the prefix into the NFA.
+        unsigned prefixEnd = nfa.root();
+        for (unsigned i = 0; i &lt; stack.size() - 1; ++i) {
+            ASSERT(!stack[i]-&gt;edges.isEmpty());
+            const PrefixTreeEdge&amp; edge = stack[i]-&gt;edges.last();
+            prefixEnd = edge.term.generateGraph(nfa, prefixEnd, edge.child-&gt;finalActions);
</ins><span class="cx">         }
</span><del>-
-        // We have processed all the subtrees of this level, pop the stack and move on to the next sibling.
-        activeStack.removeLast();
-        if (activeStack.isEmpty())
-            break;
-        ++activeStack.last().iterator;
</del><ins>+        // Put the non-quantified vertices in the subtree into the NFA and delete them.
+        ASSERT(stack.last());
+        generateNFAForSubtree(nfa, prefixEnd, *stack.last());
+        
+        handler(WTF::move(nfa));
+        
+        // Clean up any processed leaf nodes.
+        while (true) {
+            if (stack.size() &gt; 1) {
+                if (stack[stack.size() - 1]-&gt;edges.isEmpty()) {
+                    stack[stack.size() - 2]-&gt;edges.removeLast();
+                    stack.removeLast();
+                } else
+                    break; // Vertex is not a leaf.
+            } else
+                break; // Leave the empty root.
+        }
</ins><span class="cx">     }
</span><span class="cx"> }
</span><span class="cx"> 
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsCombinedURLFiltersh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/CombinedURLFilters.h (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/CombinedURLFilters.h        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/CombinedURLFilters.h        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -46,8 +46,8 @@
</span><span class="cx"> 
</span><span class="cx">     void addPattern(uint64_t patternId, const Vector&lt;Term&gt;&amp; pattern);
</span><span class="cx"> 
</span><del>-    void processNFAs(std::function&lt;void(NFA&amp;&amp;)&gt; handler) const;
-    void clear();
</del><ins>+    void processNFAs(std::function&lt;void(NFA&amp;&amp;)&gt; handler);
+    bool isEmpty();
</ins><span class="cx"> 
</span><span class="cx"> #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
</span><span class="cx">     size_t memoryUsed() const;
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsContentExtensionCompilercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -198,6 +198,7 @@
</span><span class="cx"> #endif
</span><span class="cx"> 
</span><span class="cx">     bool firstNFASeen = false;
</span><ins>+    // FIXME: Combine small NFAs to reduce the number of NFAs.
</ins><span class="cx">     combinedURLFilters.processNFAs([&amp;](NFA&amp;&amp; nfa) {
</span><span class="cx"> #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
</span><span class="cx">         nfa.debugPrintDot();
</span><span class="lines">@@ -244,6 +245,7 @@
</span><span class="cx"> 
</span><span class="cx">         firstNFASeen = true;
</span><span class="cx">     });
</span><ins>+    ASSERT(combinedURLFilters.isEmpty());
</ins><span class="cx"> 
</span><span class="cx">     if (!firstNFASeen) {
</span><span class="cx">         // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
</span><span class="lines">@@ -261,9 +263,6 @@
</span><span class="cx">         client.writeBytecode(WTF::move(bytecode));
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    // FIXME: combinedURLFilters should be cleared incrementally as it is processing NFAs. 
-    combinedURLFilters.clear();
-
</del><span class="cx">     LOG_LARGE_STRUCTURES(universalActionLocations, universalActionLocations.capacity() * sizeof(unsigned));
</span><span class="cx">     universalActionLocations.clear();
</span><span class="cx"> 
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsContentExtensionsDebuggingh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -36,7 +36,7 @@
</span><span class="cx"> #define CONTENT_EXTENSIONS_PAGE_SIZE 16384
</span><span class="cx"> 
</span><span class="cx"> #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
</span><del>-#define LOG_LARGE_STRUCTURES(name, size) if (size &gt; 1000000) { dataLogF(&quot;NAME: %s SIZE %d&quot;, #name, (int)(size)); };
</del><ins>+#define LOG_LARGE_STRUCTURES(name, size) if (size &gt; 1000000) { dataLogF(&quot;NAME: %s SIZE %d\n&quot;, #name, (int)(size)); };
</ins><span class="cx"> #else
</span><span class="cx"> #define LOG_LARGE_STRUCTURES(name, size)
</span><span class="cx"> #endif
</span></span></pre></div>
<a id="trunkToolsChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Tools/ChangeLog (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Tools/ChangeLog        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Tools/ChangeLog        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -1,3 +1,15 @@
</span><ins>+2015-05-05  Alex Christensen  &lt;achristensen@webkit.org&gt;
+
+        [Content Extensions] Combine NFAs properly and free memory as we compile.
+        https://bugs.webkit.org/show_bug.cgi?id=144485
+
+        Reviewed by Benjamin Poulain.
+
+        * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+        (TestWebKitAPI::TEST_F):
+        Added tests for correctly splitting up NFAs with unquantified terms after quantified terms.
+        Added tests for deep NFAs.
+
</ins><span class="cx"> 2015-05-04  Alex Christensen  &lt;achristensen@webkit.org&gt;
</span><span class="cx"> 
</span><span class="cx">         [Content Extensions] Use less memory when writing byte code to file
</span></span></pre></div>
<a id="trunkToolsTestWebKitAPITestsWebCoreContentExtensionscpp"></a>
<div class="modfile"><h4>Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -505,10 +505,9 @@
</span><span class="cx">     testRequest(backend, {URL(URL(), &quot;http://webkit.org&quot;), URL(URL(), &quot;http://not_webkit.org&quot;), ResourceType::Image}, { ContentExtensions::ActionType::BlockCookies, ContentExtensions::ActionType::BlockLoad });
</span><span class="cx"> }
</span><span class="cx">     
</span><del>-TEST_F(ContentExtensionTest, MultiDFA)
</del><ins>+TEST_F(ContentExtensionTest, WideNFA)
</ins><span class="cx"> {
</span><span class="cx">     // Make an NFA with about 1400 nodes.
</span><del>-    // FIXME: This does not make multiple DFAs anymore. Add a test that does.
</del><span class="cx">     StringBuilder ruleList;
</span><span class="cx">     ruleList.append('[');
</span><span class="cx">     for (char c1 = 'A'; c1 &lt;= 'Z'; ++c1) {
</span><span class="lines">@@ -543,6 +542,37 @@
</span><span class="cx">     testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/&quot;), { });
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+TEST_F(ContentExtensionTest, DeepNFA)
+{
+    const unsigned size = 100000;
+    
+    ContentExtensions::CombinedURLFilters combinedURLFilters;
+    ContentExtensions::URLFilterParser parser(combinedURLFilters);
+    
+    // FIXME: DFAToNFA::convert takes way too long on these deep NFAs. We should optimize for that case.
+    
+    StringBuilder lotsOfAs;
+    for (unsigned i = 0; i &lt; size; ++i)
+        lotsOfAs.append('A');
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(lotsOfAs.toString().utf8().data(), false, 0));
+    
+    // FIXME: Yarr ought to be able to handle 2MB regular expressions.
+    StringBuilder tooManyAs;
+    for (unsigned i = 0; i &lt; size * 20; ++i)
+        tooManyAs.append('A');
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::YarrError, parser.addPattern(tooManyAs.toString().utf8().data(), false, 0));
+    
+    StringBuilder nestedGroups;
+    for (unsigned i = 0; i &lt; size; ++i)
+        nestedGroups.append('(');
+    for (unsigned i = 0; i &lt; size; ++i)
+        nestedGroups.append(&quot;B)&quot;);
+    // FIXME: Add nestedGroups. Right now it also takes too long. It should be optimized.
+    
+    // This should not crash and not timeout.
+    EXPECT_EQ(1ul, createNFAs(combinedURLFilters).size());
+}
+
</ins><span class="cx"> void checkCompilerError(const char* json, ContentExtensions::ContentExtensionError expectedError)
</span><span class="cx"> {
</span><span class="cx">     WebCore::ContentExtensions::CompiledContentExtensionData extensionData;
</span><span class="lines">@@ -626,9 +656,9 @@
</span><span class="cx"> TEST_F(ContentExtensionTest, StrictPrefixSeparatedMachines2)
</span><span class="cx"> {
</span><span class="cx">     auto backend = makeBackend(&quot;[{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;^foo\&quot;}},&quot;
</span><del>-    &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;^.*[a-c]+bar\&quot;}},&quot;
-    &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;^webkit:\&quot;}},&quot;
-    &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;[a-c]+b+oom\&quot;}}]&quot;);
</del><ins>+        &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;^.*[a-c]+bar\&quot;}},&quot;
+        &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;^webkit:\&quot;}},&quot;
+        &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;[a-c]+b+oom\&quot;}}]&quot;);
</ins><span class="cx"> 
</span><span class="cx">     testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/&quot;), { });
</span><span class="cx">     testRequest(backend, mainDocumentRequest(&quot;foo://webkit.org/&quot;), { ContentExtensions::ActionType::BlockLoad });
</span><span class="lines">@@ -656,6 +686,52 @@
</span><span class="cx">     EXPECT_EQ(3ul, createNFAs(combinedURLFilters).size());
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+TEST_F(ContentExtensionTest, StrictPrefixSeparatedMachines3)
+{
+    auto backend = makeBackend(&quot;[{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;A*D\&quot;}},&quot;
+        &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;ignore-previous-rules\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;A*BA+\&quot;}},&quot;
+        &quot;{\&quot;action\&quot;:{\&quot;type\&quot;:\&quot;block-cookies\&quot;},\&quot;trigger\&quot;:{\&quot;url-filter\&quot;:\&quot;A*BC\&quot;}}]&quot;);
+    
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/D&quot;), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/AAD&quot;), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/AB&quot;), { });
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/ABA&quot;), { }, true);
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/ABAD&quot;), { }, true);
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/BC&quot;), { ContentExtensions::ActionType::BlockCookies });
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/ABC&quot;), { ContentExtensions::ActionType::BlockCookies });
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/ABABC&quot;), { ContentExtensions::ActionType::BlockCookies }, true);
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/ABABCAD&quot;), { ContentExtensions::ActionType::BlockCookies }, true);
+    testRequest(backend, mainDocumentRequest(&quot;http://webkit.org/ABCAD&quot;), { ContentExtensions::ActionType::BlockCookies, ContentExtensions::ActionType::BlockLoad });
+}
+    
+TEST_F(ContentExtensionTest, StrictPrefixSeparatedMachines3Partitioning)
+{
+    ContentExtensions::CombinedURLFilters combinedURLFilters;
+    ContentExtensions::URLFilterParser parser(combinedURLFilters);
+    
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;A*D&quot;, false, 0));
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;A*BA+&quot;, false, 1));
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;A*BC&quot;, false, 2));
+    
+    // &quot;A*A&quot; and &quot;A*BC&quot; can be grouped, &quot;A*BA+&quot; should not.
+    EXPECT_EQ(2ul, createNFAs(combinedURLFilters).size());
+}
+
+TEST_F(ContentExtensionTest, QuantifierInGroup)
+{
+    ContentExtensions::CombinedURLFilters combinedURLFilters;
+    ContentExtensions::URLFilterParser parser(combinedURLFilters);
+    
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;(((A+)B)C)&quot;, false, 0));
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;(((A)B+)C)&quot;, false, 1));
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;(((A)B+)C)D&quot;, false, 2));
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;(((A)B)C+)&quot;, false, 3));
+    EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(&quot;(((A)B)C)&quot;, false, 4));
+    
+    // (((A)B+)C) and (((A)B+)C)D should be in the same NFA.
+    EXPECT_EQ(4ul, createNFAs(combinedURLFilters).size());
+}
+
</ins><span class="cx"> static void testPatternStatus(String pattern, ContentExtensions::URLFilterParser::ParseStatus status)
</span><span class="cx"> {
</span><span class="cx">     ContentExtensions::CombinedURLFilters combinedURLFilters;
</span></span></pre>
</div>
</div>

</body>
</html>