<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[183818] trunk</title>
</head>
<body>
<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.webkit.org/projects/webkit/changeset/183818">183818</a></dd>
<dt>Author</dt> <dd>achristensen@apple.com</dd>
<dt>Date</dt> <dd>2015-05-05 10:27:41 -0700 (Tue, 05 May 2015)</dd>
</dl>
<h3>Log Message</h3>
<pre>[Content Extensions] Combine NFAs properly and free memory as we compile.
https://bugs.webkit.org/show_bug.cgi?id=144485
Reviewed by Benjamin Poulain.
Source/WebCore:
This patch correctly combines all regular expressions with a common prefix up to
the last quantified term into the same NFA. It also deletes the prefix tree as it
creates NFAs, thus reducing the maximum memory used when compiling.
* contentextensions/CombinedURLFilters.cpp:
(WebCore::ContentExtensions::CombinedURLFilters::isEmpty):
(WebCore::ContentExtensions::CombinedURLFilters::addPattern):
(WebCore::ContentExtensions::generateNFAForSubtree):
(WebCore::ContentExtensions::CombinedURLFilters::processNFAs):
(WebCore::ContentExtensions::CombinedURLFilters::clear): Deleted.
* contentextensions/CombinedURLFilters.h:
* contentextensions/ContentExtensionCompiler.cpp:
(WebCore::ContentExtensions::compileRuleList):
* contentextensions/ContentExtensionsDebugging.h:
Tools:
* TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
(TestWebKitAPI::TEST_F):
Added tests for correctly splitting up NFAs with unquantified terms after quantified terms.
Added tests for deep NFAs.</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkSourceWebCoreChangeLog">trunk/Source/WebCore/ChangeLog</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsCombinedURLFilterscpp">trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsCombinedURLFiltersh">trunk/Source/WebCore/contentextensions/CombinedURLFilters.h</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsContentExtensionCompilercpp">trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp</a></li>
<li><a href="#trunkSourceWebCorecontentextensionsContentExtensionsDebuggingh">trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h</a></li>
<li><a href="#trunkToolsChangeLog">trunk/Tools/ChangeLog</a></li>
<li><a href="#trunkToolsTestWebKitAPITestsWebCoreContentExtensionscpp">trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkSourceWebCoreChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/ChangeLog (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/ChangeLog        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/ChangeLog        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -1,3 +1,25 @@
</span><ins>+2015-05-05 Alex Christensen <achristensen@webkit.org>
+
+ [Content Extensions] Combine NFAs properly and free memory as we compile.
+ https://bugs.webkit.org/show_bug.cgi?id=144485
+
+ Reviewed by Benjamin Poulain.
+
+ This patch correctly combines all regular expressions with a common prefix up to
+ the last quantified term into the same NFA. It also deletes the prefix tree as it
+ creates NFAs, thus reducing the maximum memory used when compiling.
+
+ * contentextensions/CombinedURLFilters.cpp:
+ (WebCore::ContentExtensions::CombinedURLFilters::isEmpty):
+ (WebCore::ContentExtensions::CombinedURLFilters::addPattern):
+ (WebCore::ContentExtensions::generateNFAForSubtree):
+ (WebCore::ContentExtensions::CombinedURLFilters::processNFAs):
+ (WebCore::ContentExtensions::CombinedURLFilters::clear): Deleted.
+ * contentextensions/CombinedURLFilters.h:
+ * contentextensions/ContentExtensionCompiler.cpp:
+ (WebCore::ContentExtensions::compileRuleList):
+ * contentextensions/ContentExtensionsDebugging.h:
+
</ins><span class="cx"> 2015-05-04 Alex Christensen <achristensen@webkit.org>
</span><span class="cx">
</span><span class="cx"> [Content Extensions] Use less memory when writing byte code to file
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsCombinedURLFilterscpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -47,7 +47,6 @@
</span><span class="cx"> struct PrefixTreeVertex {
</span><span class="cx"> PrefixTreeEdges edges;
</span><span class="cx"> ActionList finalActions;
</span><del>- bool inVariableLengthPrefix { false };
</del><span class="cx"> };
</span><span class="cx">
</span><span class="cx"> #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
</span><span class="lines">@@ -116,9 +115,9 @@
</span><span class="cx"> {
</span><span class="cx"> }
</span><span class="cx">
</span><del>-void CombinedURLFilters::clear()
</del><ins>+bool CombinedURLFilters::isEmpty()
</ins><span class="cx"> {
</span><del>- m_prefixTreeRoot = std::make_unique<PrefixTreeVertex>();
</del><ins>+ return m_prefixTreeRoot->edges.isEmpty();
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> void CombinedURLFilters::addPattern(uint64_t actionId, const Vector<Term>& pattern)
</span><span class="lines">@@ -128,13 +127,8 @@
</span><span class="cx"> if (pattern.isEmpty())
</span><span class="cx"> return;
</span><span class="cx">
</span><del>- Vector<PrefixTreeVertex*, 128> prefixTreeVerticesForPattern;
- prefixTreeVerticesForPattern.reserveInitialCapacity(pattern.size() + 1);
-
</del><span class="cx"> // Extend the prefix tree with the new pattern.
</span><del>- bool hasNewTerm = false;
</del><span class="cx"> PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get();
</span><del>- prefixTreeVerticesForPattern.append(lastPrefixTree);
</del><span class="cx">
</span><span class="cx"> for (const Term& term : pattern) {
</span><span class="cx"> size_t nextEntryIndex = WTF::notFound;
</span><span class="lines">@@ -147,129 +141,125 @@
</span><span class="cx"> if (nextEntryIndex != WTF::notFound)
</span><span class="cx"> lastPrefixTree = lastPrefixTree->edges[nextEntryIndex].child.get();
</span><span class="cx"> else {
</span><del>- hasNewTerm = true;
-
</del><span class="cx"> lastPrefixTree->edges.append(PrefixTreeEdge({term, std::make_unique<PrefixTreeVertex>()}));
</span><span class="cx"> lastPrefixTree = lastPrefixTree->edges.last().child.get();
</span><span class="cx"> }
</span><del>- prefixTreeVerticesForPattern.append(lastPrefixTree);
</del><span class="cx"> }
</span><span class="cx">
</span><del>- ActionList& actions = prefixTreeVerticesForPattern.last()->finalActions;
</del><ins>+ ActionList& actions = lastPrefixTree->finalActions;
</ins><span class="cx"> if (actions.find(actionId) == WTF::notFound)
</span><span class="cx"> actions.append(actionId);
</span><del>-
- if (!hasNewTerm)
- return;
-
- bool hasSeenVariableLengthTerms = false;
- for (unsigned i = pattern.size(); i--;) {
- const Term& term = pattern[i];
- hasSeenVariableLengthTerms |= !term.hasFixedLength();
- prefixTreeVerticesForPattern[i + 1]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
- }
- prefixTreeVerticesForPattern[0]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
</del><span class="cx"> }
</span><span class="cx">
</span><del>-struct ActiveSubtree {
- const PrefixTreeVertex* vertex;
- PrefixTreeEdges::const_iterator iterator;
-};
-
-static void generateNFAForSubtree(NFA& nfa, unsigned rootId, const PrefixTreeVertex& prefixTreeVertex)
</del><ins>+static void generateNFAForSubtree(NFA& nfa, unsigned nfaRootId, PrefixTreeVertex& root)
</ins><span class="cx"> {
</span><del>- ASSERT_WITH_MESSAGE(!prefixTreeVertex.inVariableLengthPrefix, "This code assumes the subtrees with variable prefix length have already been handled.");
-
- struct ActiveNFASubtree : ActiveSubtree {
- ActiveNFASubtree(const PrefixTreeVertex* vertex, PrefixTreeEdges::const_iterator iterator, unsigned nodeIndex)
- : ActiveSubtree({ vertex, iterator })
- , lastNodeIndex(nodeIndex)
</del><ins>+ // This recurses the subtree of the prefix tree.
+ // For each edge that has fixed length (no quantifiers like ?, *, or +) it generates the nfa graph,
+ // recurses into children, and deletes any processed leaf nodes.
+ struct ActiveSubtree {
+ ActiveSubtree(PrefixTreeVertex& vertex, unsigned nfaNodeId, unsigned edgeIndex)
+ : vertex(vertex)
+ , nfaNodeId(nfaNodeId)
+ , edgeIndex(edgeIndex)
</ins><span class="cx"> {
</span><span class="cx"> }
</span><del>- unsigned lastNodeIndex;
</del><ins>+ PrefixTreeVertex& vertex;
+ unsigned nfaNodeId;
+ unsigned edgeIndex;
</ins><span class="cx"> };
</span><ins>+ Vector<ActiveSubtree> stack;
+ if (!root.edges.isEmpty())
+ stack.append(ActiveSubtree(root, nfaRootId, 0));
+
+ // Generate graphs for each subtree that does not contain any quantifiers.
+ while (!stack.isEmpty()) {
+ PrefixTreeVertex& vertex = stack.last().vertex;
+ const unsigned edgeIndex = stack.last().edgeIndex;
</ins><span class="cx">
</span><del>- Vector<ActiveNFASubtree> activeStack;
- activeStack.append(ActiveNFASubtree(&prefixTreeVertex, prefixTreeVertex.edges.begin(), rootId));
-
- while (true) {
- ProcessSubtree:
- for (ActiveNFASubtree& activeSubtree = activeStack.last(); activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
- if (activeSubtree.iterator->child->inVariableLengthPrefix)
</del><ins>+ if (edgeIndex < vertex.edges.size()) {
+ auto& edge = vertex.edges[edgeIndex];
+
+ // Quantified edges in the subtree will be a part of another NFA.
+ if (!edge.term.hasFixedLength()) {
+ stack.last().edgeIndex++;
</ins><span class="cx"> continue;
</span><del>-
- const Term& term = activeSubtree.iterator->term;
- unsigned newEndNodeIndex = term.generateGraph(nfa, activeSubtree.lastNodeIndex, activeSubtree.iterator->child->finalActions);
-
- PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->child.get();
- if (!prefixTreeVertex->edges.isEmpty()) {
- activeStack.append(ActiveNFASubtree(prefixTreeVertex, prefixTreeVertex->edges.begin(), newEndNodeIndex));
- goto ProcessSubtree;
</del><span class="cx"> }
</span><ins>+
+ unsigned subtreeRootId = edge.term.generateGraph(nfa, stack.last().nfaNodeId, edge.child->finalActions);
+ ASSERT(edge.child.get());
+ stack.append(ActiveSubtree(*edge.child.get(), subtreeRootId, 0));
+ } else {
+ ASSERT(edgeIndex == vertex.edges.size());
+ vertex.edges.removeAllMatching([](PrefixTreeEdge& edge)
+ {
+ return edge.term.isDeletedValue();
+ });
+ stack.removeLast();
+ if (!stack.isEmpty()) {
+ auto& activeSubtree = stack.last();
+ auto& edge = activeSubtree.vertex.edges[stack.last().edgeIndex];
+ if (edge.child->edges.isEmpty())
+ edge.term = Term(Term::DeletedValue); // Mark this leaf for deleting.
+ activeSubtree.edgeIndex++;
+ }
</ins><span class="cx"> }
</span><del>-
- activeStack.removeLast();
- if (activeStack.isEmpty())
- break;
- ++activeStack.last().iterator;
</del><span class="cx"> }
</span><span class="cx"> }
</span><span class="cx">
</span><del>-void CombinedURLFilters::processNFAs(std::function<void(NFA&&)> handler) const
</del><ins>+void CombinedURLFilters::processNFAs(std::function<void(NFA&&)> handler)
</ins><span class="cx"> {
</span><del>- Vector<ActiveSubtree> activeStack;
- activeStack.append(ActiveSubtree({ m_prefixTreeRoot.get(), m_prefixTreeRoot->edges.begin() }));
-
</del><ins>+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ print();
+#endif
</ins><span class="cx"> while (true) {
</span><del>- ProcessSubtree:
- ActiveSubtree& activeSubtree = activeStack.last();
-
- // We go depth first into the subtrees with variable prefix. Find the next subtree.
- for (; activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
- PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->child.get();
- if (prefixTreeVertex->inVariableLengthPrefix) {
- activeStack.append(ActiveSubtree({ prefixTreeVertex, prefixTreeVertex->edges.begin() }));
- goto ProcessSubtree;
- }
</del><ins>+ // Traverse out to a leaf.
+ Vector<PrefixTreeVertex*, 128> stack;
+ PrefixTreeVertex* vertex = m_prefixTreeRoot.get();
+ while (true) {
+ ASSERT(vertex);
+ stack.append(vertex);
+ if (vertex->edges.isEmpty())
+ break;
+ vertex = vertex->edges.last().child.get();
</ins><span class="cx"> }
</span><del>-
- // After we reached here, we know that all the subtrees with variable prefixes have been processed,
- // time to generate the NFA for the graph rooted here.
- bool needToGenerate = activeSubtree.vertex->edges.isEmpty() && !activeSubtree.vertex->finalActions.isEmpty();
- if (!needToGenerate) {
- for (const auto& edge : activeSubtree.vertex->edges) {
- if (!edge.child->inVariableLengthPrefix) {
- needToGenerate = true;
- break;
- }
- }
</del><ins>+ if (stack.size() == 1)
+ break; // We're done once we have processed and removed all the edges in the prefix tree.
+
+ // Find the prefix root for this NFA. This is the vertex after the last term with a quantifier if there is one,
+ // or the root if there are no quantifiers left.
+ while (stack.size() > 1) {
+ if (!stack[stack.size() - 2]->edges.last().term.hasFixedLength())
+ break;
+ stack.removeLast();
</ins><span class="cx"> }
</span><del>-
- if (needToGenerate) {
- NFA nfa;
-
- unsigned prefixEnd = nfa.root();
-
- for (unsigned i = 0; i < activeStack.size() - 1; ++i) {
- const Term& term = activeStack[i].iterator->term;
- prefixEnd = term.generateGraph(nfa, prefixEnd, activeStack[i].iterator->child->finalActions);
- }
-
- for (const auto& edge : activeSubtree.vertex->edges) {
- if (!edge.child->inVariableLengthPrefix) {
- unsigned newSubtreeStart = edge.term.generateGraph(nfa, prefixEnd, edge.child->finalActions);
- generateNFAForSubtree(nfa, newSubtreeStart, *edge.child);
- }
- }
-
- handler(WTF::move(nfa));
</del><ins>+ ASSERT_WITH_MESSAGE(!stack.isEmpty(), "At least the root should be in the stack");
+
+ // Make an NFA with the subtrees for whom this is also the last quantifier (or who also have no quantifier).
+ NFA nfa;
+ // Put the prefix into the NFA.
+ unsigned prefixEnd = nfa.root();
+ for (unsigned i = 0; i < stack.size() - 1; ++i) {
+ ASSERT(!stack[i]->edges.isEmpty());
+ const PrefixTreeEdge& edge = stack[i]->edges.last();
+ prefixEnd = edge.term.generateGraph(nfa, prefixEnd, edge.child->finalActions);
</ins><span class="cx"> }
</span><del>-
- // We have processed all the subtrees of this level, pop the stack and move on to the next sibling.
- activeStack.removeLast();
- if (activeStack.isEmpty())
- break;
- ++activeStack.last().iterator;
</del><ins>+ // Put the non-quantified vertices in the subtree into the NFA and delete them.
+ ASSERT(stack.last());
+ generateNFAForSubtree(nfa, prefixEnd, *stack.last());
+
+ handler(WTF::move(nfa));
+
+ // Clean up any processed leaf nodes.
+ while (true) {
+ if (stack.size() > 1) {
+ if (stack[stack.size() - 1]->edges.isEmpty()) {
+ stack[stack.size() - 2]->edges.removeLast();
+ stack.removeLast();
+ } else
+ break; // Vertex is not a leaf.
+ } else
+ break; // Leave the empty root.
+ }
</ins><span class="cx"> }
</span><span class="cx"> }
</span><span class="cx">
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsCombinedURLFiltersh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/CombinedURLFilters.h (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/CombinedURLFilters.h        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/CombinedURLFilters.h        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -46,8 +46,8 @@
</span><span class="cx">
</span><span class="cx"> void addPattern(uint64_t patternId, const Vector<Term>& pattern);
</span><span class="cx">
</span><del>- void processNFAs(std::function<void(NFA&&)> handler) const;
- void clear();
</del><ins>+ void processNFAs(std::function<void(NFA&&)> handler);
+ bool isEmpty();
</ins><span class="cx">
</span><span class="cx"> #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
</span><span class="cx"> size_t memoryUsed() const;
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsContentExtensionCompilercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -198,6 +198,7 @@
</span><span class="cx"> #endif
</span><span class="cx">
</span><span class="cx"> bool firstNFASeen = false;
</span><ins>+ // FIXME: Combine small NFAs to reduce the number of NFAs.
</ins><span class="cx"> combinedURLFilters.processNFAs([&](NFA&& nfa) {
</span><span class="cx"> #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
</span><span class="cx"> nfa.debugPrintDot();
</span><span class="lines">@@ -244,6 +245,7 @@
</span><span class="cx">
</span><span class="cx"> firstNFASeen = true;
</span><span class="cx"> });
</span><ins>+ ASSERT(combinedURLFilters.isEmpty());
</ins><span class="cx">
</span><span class="cx"> if (!firstNFASeen) {
</span><span class="cx"> // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
</span><span class="lines">@@ -261,9 +263,6 @@
</span><span class="cx"> client.writeBytecode(WTF::move(bytecode));
</span><span class="cx"> }
</span><span class="cx">
</span><del>- // FIXME: combinedURLFilters should be cleared incrementally as it is processing NFAs.
- combinedURLFilters.clear();
-
</del><span class="cx"> LOG_LARGE_STRUCTURES(universalActionLocations, universalActionLocations.capacity() * sizeof(unsigned));
</span><span class="cx"> universalActionLocations.clear();
</span><span class="cx">
</span></span></pre></div>
<a id="trunkSourceWebCorecontentextensionsContentExtensionsDebuggingh"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -36,7 +36,7 @@
</span><span class="cx"> #define CONTENT_EXTENSIONS_PAGE_SIZE 16384
</span><span class="cx">
</span><span class="cx"> #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
</span><del>-#define LOG_LARGE_STRUCTURES(name, size) if (size > 1000000) { dataLogF("NAME: %s SIZE %d", #name, (int)(size)); };
</del><ins>+#define LOG_LARGE_STRUCTURES(name, size) if (size > 1000000) { dataLogF("NAME: %s SIZE %d\n", #name, (int)(size)); };
</ins><span class="cx"> #else
</span><span class="cx"> #define LOG_LARGE_STRUCTURES(name, size)
</span><span class="cx"> #endif
</span></span></pre></div>
<a id="trunkToolsChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Tools/ChangeLog (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Tools/ChangeLog        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Tools/ChangeLog        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -1,3 +1,15 @@
</span><ins>+2015-05-05 Alex Christensen <achristensen@webkit.org>
+
+ [Content Extensions] Combine NFAs properly and free memory as we compile.
+ https://bugs.webkit.org/show_bug.cgi?id=144485
+
+ Reviewed by Benjamin Poulain.
+
+ * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+ (TestWebKitAPI::TEST_F):
+ Added tests for correctly splitting up NFAs with unquantified terms after quantified terms.
+ Added tests for deep NFAs.
+
</ins><span class="cx"> 2015-05-04 Alex Christensen <achristensen@webkit.org>
</span><span class="cx">
</span><span class="cx"> [Content Extensions] Use less memory when writing byte code to file
</span></span></pre></div>
<a id="trunkToolsTestWebKitAPITestsWebCoreContentExtensionscpp"></a>
<div class="modfile"><h4>Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp (183817 => 183818)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp        2015-05-05 17:12:36 UTC (rev 183817)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp        2015-05-05 17:27:41 UTC (rev 183818)
</span><span class="lines">@@ -505,10 +505,9 @@
</span><span class="cx"> testRequest(backend, {URL(URL(), "http://webkit.org"), URL(URL(), "http://not_webkit.org"), ResourceType::Image}, { ContentExtensions::ActionType::BlockCookies, ContentExtensions::ActionType::BlockLoad });
</span><span class="cx"> }
</span><span class="cx">
</span><del>-TEST_F(ContentExtensionTest, MultiDFA)
</del><ins>+TEST_F(ContentExtensionTest, WideNFA)
</ins><span class="cx"> {
</span><span class="cx"> // Make an NFA with about 1400 nodes.
</span><del>- // FIXME: This does not make multiple DFAs anymore. Add a test that does.
</del><span class="cx"> StringBuilder ruleList;
</span><span class="cx"> ruleList.append('[');
</span><span class="cx"> for (char c1 = 'A'; c1 <= 'Z'; ++c1) {
</span><span class="lines">@@ -543,6 +542,37 @@
</span><span class="cx"> testRequest(backend, mainDocumentRequest("http://webkit.org/"), { });
</span><span class="cx"> }
</span><span class="cx">
</span><ins>+TEST_F(ContentExtensionTest, DeepNFA)
+{
+ const unsigned size = 100000;
+
+ ContentExtensions::CombinedURLFilters combinedURLFilters;
+ ContentExtensions::URLFilterParser parser(combinedURLFilters);
+
+ // FIXME: DFAToNFA::convert takes way too long on these deep NFAs. We should optimize for that case.
+
+ StringBuilder lotsOfAs;
+ for (unsigned i = 0; i < size; ++i)
+ lotsOfAs.append('A');
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern(lotsOfAs.toString().utf8().data(), false, 0));
+
+ // FIXME: Yarr ought to be able to handle 2MB regular expressions.
+ StringBuilder tooManyAs;
+ for (unsigned i = 0; i < size * 20; ++i)
+ tooManyAs.append('A');
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::YarrError, parser.addPattern(tooManyAs.toString().utf8().data(), false, 0));
+
+ StringBuilder nestedGroups;
+ for (unsigned i = 0; i < size; ++i)
+ nestedGroups.append('(');
+ for (unsigned i = 0; i < size; ++i)
+ nestedGroups.append("B)");
+ // FIXME: Add nestedGroups. Right now it also takes too long. It should be optimized.
+
+ // This should not crash and not timeout.
+ EXPECT_EQ(1ul, createNFAs(combinedURLFilters).size());
+}
+
</ins><span class="cx"> void checkCompilerError(const char* json, ContentExtensions::ContentExtensionError expectedError)
</span><span class="cx"> {
</span><span class="cx"> WebCore::ContentExtensions::CompiledContentExtensionData extensionData;
</span><span class="lines">@@ -626,9 +656,9 @@
</span><span class="cx"> TEST_F(ContentExtensionTest, StrictPrefixSeparatedMachines2)
</span><span class="cx"> {
</span><span class="cx"> auto backend = makeBackend("[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^foo\"}},"
</span><del>- "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^.*[a-c]+bar\"}},"
- "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^webkit:\"}},"
- "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"[a-c]+b+oom\"}}]");
</del><ins>+ "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^.*[a-c]+bar\"}},"
+ "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^webkit:\"}},"
+ "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"[a-c]+b+oom\"}}]");
</ins><span class="cx">
</span><span class="cx"> testRequest(backend, mainDocumentRequest("http://webkit.org/"), { });
</span><span class="cx"> testRequest(backend, mainDocumentRequest("foo://webkit.org/"), { ContentExtensions::ActionType::BlockLoad });
</span><span class="lines">@@ -656,6 +686,52 @@
</span><span class="cx"> EXPECT_EQ(3ul, createNFAs(combinedURLFilters).size());
</span><span class="cx"> }
</span><span class="cx">
</span><ins>+TEST_F(ContentExtensionTest, StrictPrefixSeparatedMachines3)
+{
+ auto backend = makeBackend("[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"A*D\"}},"
+ "{\"action\":{\"type\":\"ignore-previous-rules\"},\"trigger\":{\"url-filter\":\"A*BA+\"}},"
+ "{\"action\":{\"type\":\"block-cookies\"},\"trigger\":{\"url-filter\":\"A*BC\"}}]");
+
+ testRequest(backend, mainDocumentRequest("http://webkit.org/D"), { ContentExtensions::ActionType::BlockLoad });
+ testRequest(backend, mainDocumentRequest("http://webkit.org/AAD"), { ContentExtensions::ActionType::BlockLoad });
+ testRequest(backend, mainDocumentRequest("http://webkit.org/AB"), { });
+ testRequest(backend, mainDocumentRequest("http://webkit.org/ABA"), { }, true);
+ testRequest(backend, mainDocumentRequest("http://webkit.org/ABAD"), { }, true);
+ testRequest(backend, mainDocumentRequest("http://webkit.org/BC"), { ContentExtensions::ActionType::BlockCookies });
+ testRequest(backend, mainDocumentRequest("http://webkit.org/ABC"), { ContentExtensions::ActionType::BlockCookies });
+ testRequest(backend, mainDocumentRequest("http://webkit.org/ABABC"), { ContentExtensions::ActionType::BlockCookies }, true);
+ testRequest(backend, mainDocumentRequest("http://webkit.org/ABABCAD"), { ContentExtensions::ActionType::BlockCookies }, true);
+ testRequest(backend, mainDocumentRequest("http://webkit.org/ABCAD"), { ContentExtensions::ActionType::BlockCookies, ContentExtensions::ActionType::BlockLoad });
+}
+
+TEST_F(ContentExtensionTest, StrictPrefixSeparatedMachines3Partitioning)
+{
+ ContentExtensions::CombinedURLFilters combinedURLFilters;
+ ContentExtensions::URLFilterParser parser(combinedURLFilters);
+
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("A*D", false, 0));
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("A*BA+", false, 1));
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("A*BC", false, 2));
+
+ // "A*A" and "A*BC" can be grouped, "A*BA+" should not.
+ EXPECT_EQ(2ul, createNFAs(combinedURLFilters).size());
+}
+
+TEST_F(ContentExtensionTest, QuantifierInGroup)
+{
+ ContentExtensions::CombinedURLFilters combinedURLFilters;
+ ContentExtensions::URLFilterParser parser(combinedURLFilters);
+
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("(((A+)B)C)", false, 0));
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("(((A)B+)C)", false, 1));
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("(((A)B+)C)D", false, 2));
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("(((A)B)C+)", false, 3));
+ EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("(((A)B)C)", false, 4));
+
+ // (((A)B+)C) and (((A)B+)C)D should be in the same NFA.
+ EXPECT_EQ(4ul, createNFAs(combinedURLFilters).size());
+}
+
</ins><span class="cx"> static void testPatternStatus(String pattern, ContentExtensions::URLFilterParser::ParseStatus status)
</span><span class="cx"> {
</span><span class="cx"> ContentExtensions::CombinedURLFilters combinedURLFilters;
</span></span></pre>
</div>
</div>
</body>
</html>