<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[205990] trunk/Source/WebCore</title>
</head>
<body>
<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.webkit.org/projects/webkit/changeset/205990">205990</a></dd>
<dt>Author</dt> <dd>achristensen@apple.com</dd>
<dt>Date</dt> <dd>2016-09-15 11:48:45 -0700 (Thu, 15 Sep 2016)</dd>
</dl>
<h3>Log Message</h3>
<pre>Use character class table in URLParser
https://bugs.webkit.org/show_bug.cgi?id=161997
Reviewed by Chris Dumez.
No change in behavior except a performance improvement.
Before this change, URLParser took 1.514x as long to run my URL Parsing benchmark as URL::parse
with a standard deviation of the ration of the runtimes of 0.063 after 8 runs with each parser.
After this change, URLParser took 1.328x as long with a standard deviation of 0.037.
This isn't the cleanest data, but it's enough to convince me that this is a significant improvement.
* platform/URLParser.cpp:
(WebCore::isC0Control):
(WebCore::isC0ControlOrSpace):
(WebCore::isTabOrNewline):
(WebCore::isInSimpleEncodeSet):
(WebCore::isInDefaultEncodeSet):
(WebCore::isInUserInfoEncodeSet):
(WebCore::isInvalidDomainCharacter):
(WebCore::isSlashQuestionOrHash):
(WebCore::shouldPercentEncodeQueryByte):
(WebCore::shouldCopyFileURL):
(WebCore::isSingleDotPathSegment):
(WebCore::URLParser::parse):</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkSourceWebCoreChangeLog">trunk/Source/WebCore/ChangeLog</a></li>
<li><a href="#trunkSourceWebCoreplatformURLParsercpp">trunk/Source/WebCore/platform/URLParser.cpp</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkSourceWebCoreChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/ChangeLog (205989 => 205990)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/ChangeLog        2016-09-15 18:35:51 UTC (rev 205989)
+++ trunk/Source/WebCore/ChangeLog        2016-09-15 18:48:45 UTC (rev 205990)
</span><span class="lines">@@ -1,3 +1,31 @@
</span><ins>+2016-09-15 Alex Christensen <achristensen@webkit.org>
+
+ Use character class table in URLParser
+ https://bugs.webkit.org/show_bug.cgi?id=161997
+
+ Reviewed by Chris Dumez.
+
+ No change in behavior except a performance improvement.
+
+ Before this change, URLParser took 1.514x as long to run my URL Parsing benchmark as URL::parse
+ with a standard deviation of the ration of the runtimes of 0.063 after 8 runs with each parser.
+ After this change, URLParser took 1.328x as long with a standard deviation of 0.037.
+ This isn't the cleanest data, but it's enough to convince me that this is a significant improvement.
+
+ * platform/URLParser.cpp:
+ (WebCore::isC0Control):
+ (WebCore::isC0ControlOrSpace):
+ (WebCore::isTabOrNewline):
+ (WebCore::isInSimpleEncodeSet):
+ (WebCore::isInDefaultEncodeSet):
+ (WebCore::isInUserInfoEncodeSet):
+ (WebCore::isInvalidDomainCharacter):
+ (WebCore::isSlashQuestionOrHash):
+ (WebCore::shouldPercentEncodeQueryByte):
+ (WebCore::shouldCopyFileURL):
+ (WebCore::isSingleDotPathSegment):
+ (WebCore::URLParser::parse):
+
</ins><span class="cx"> 2016-09-15 Keith Miller <keith_miller@apple.com>
</span><span class="cx">
</span><span class="cx"> Pragma out undefined-var-template warnings in JSC for JSObjects that are templatized
</span></span></pre></div>
<a id="trunkSourceWebCoreplatformURLParsercpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/WebCore/platform/URLParser.cpp (205989 => 205990)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/WebCore/platform/URLParser.cpp        2016-09-15 18:35:51 UTC (rev 205989)
+++ trunk/Source/WebCore/platform/URLParser.cpp        2016-09-15 18:48:45 UTC (rev 205990)
</span><span class="lines">@@ -116,14 +116,283 @@
</span><span class="cx"> return *this;
</span><span class="cx"> }
</span><span class="cx">
</span><del>-template<typename CharacterType> static bool isC0Control(CharacterType character) { return character <= 0x0001F; }
-template<typename CharacterType> static bool isC0ControlOrSpace(CharacterType character) { return isC0Control(character) || character == 0x0020; }
-template<typename CharacterType> static bool isTabOrNewline(CharacterType character) { return character == 0x0009 || character == 0x000A || character == 0x000D; }
-template<typename CharacterType> static bool isInSimpleEncodeSet(CharacterType character) { return isC0Control(character) || character > 0x007E; }
-template<typename CharacterType> static bool isInDefaultEncodeSet(CharacterType character) { return isInSimpleEncodeSet(character) || character == 0x0020 || character == '"' || character == '#' || character == '<' || character == '>' || character == '?' || character == '`' || character == '{' || character == '}'; }
-template<typename CharacterType> static bool isInUserInfoEncodeSet(CharacterType character) { return isInDefaultEncodeSet(character) || character == '/' || character == ':' || character == ';' || character == '=' || character == '@' || character == '[' || character == '\\' || character == ']' || character == '^' || character == '|'; }
-template<typename CharacterType> static bool isInvalidDomainCharacter(CharacterType character) { return character == 0x0000 || character == 0x0009 || character == 0x000A || character == 0x000D || character == 0x0020 || character == '#' || character == '%' || character == '/' || character == ':' || character == '?' || character == '@' || character == '[' || character == '\\' || character == ']'; }
</del><ins>+enum URLCharacterClass {
+ UserInfo = 0x1,
+ Default = 0x2,
+ InvalidDomain = 0x4,
+ QueryPercent = 0x8,
+ SlashQuestionOrHash = 0x10,
+};
+
+static const uint8_t characterClassTable[256] = {
+ UserInfo | Default | InvalidDomain | QueryPercent, // 0x0
+ UserInfo | Default | QueryPercent, // 0x1
+ UserInfo | Default | QueryPercent, // 0x2
+ UserInfo | Default | QueryPercent, // 0x3
+ UserInfo | Default | QueryPercent, // 0x4
+ UserInfo | Default | QueryPercent, // 0x5
+ UserInfo | Default | QueryPercent, // 0x6
+ UserInfo | Default | QueryPercent, // 0x7
+ UserInfo | Default | QueryPercent, // 0x8
+ UserInfo | Default | InvalidDomain | QueryPercent, // 0x9
+ UserInfo | Default | InvalidDomain | QueryPercent, // 0xA
+ UserInfo | Default | QueryPercent, // 0xB
+ UserInfo | Default | QueryPercent, // 0xC
+ UserInfo | Default | InvalidDomain | QueryPercent, // 0xD
+ UserInfo | Default | QueryPercent, // 0xE
+ UserInfo | Default | QueryPercent, // 0xF
+ UserInfo | Default | QueryPercent, // 0x10
+ UserInfo | Default | QueryPercent, // 0x11
+ UserInfo | Default | QueryPercent, // 0x12
+ UserInfo | Default | QueryPercent, // 0x13
+ UserInfo | Default | QueryPercent, // 0x14
+ UserInfo | Default | QueryPercent, // 0x15
+ UserInfo | Default | QueryPercent, // 0x16
+ UserInfo | Default | QueryPercent, // 0x17
+ UserInfo | Default | QueryPercent, // 0x18
+ UserInfo | Default | QueryPercent, // 0x19
+ UserInfo | Default | QueryPercent, // 0x1A
+ UserInfo | Default | QueryPercent, // 0x1B
+ UserInfo | Default | QueryPercent, // 0x1C
+ UserInfo | Default | QueryPercent, // 0x1D
+ UserInfo | Default | QueryPercent, // 0x1E
+ UserInfo | Default | QueryPercent, // 0x1F
+ UserInfo | Default | InvalidDomain | QueryPercent, // ' '
+ 0, // '!'
+ UserInfo | Default | QueryPercent, // '"'
+ UserInfo | Default | InvalidDomain | QueryPercent | SlashQuestionOrHash, // '#'
+ 0, // '$'
+ InvalidDomain, // '%'
+ 0, // '&'
+ 0, // '''
+ 0, // '('
+ 0, // ')'
+ 0, // '*'
+ 0, // '+'
+ 0, // ','
+ 0, // '-'
+ 0, // '.'
+ UserInfo | InvalidDomain | SlashQuestionOrHash, // '/'
+ 0, // '0'
+ 0, // '1'
+ 0, // '2'
+ 0, // '3'
+ 0, // '4'
+ 0, // '5'
+ 0, // '6'
+ 0, // '7'
+ 0, // '8'
+ 0, // '9'
+ UserInfo | InvalidDomain, // ':'
+ UserInfo, // ';'
+ UserInfo | Default | QueryPercent, // '<'
+ UserInfo, // '='
+ UserInfo | Default | QueryPercent, // '>'
+ UserInfo | Default | InvalidDomain | SlashQuestionOrHash, // '?'
+ UserInfo | InvalidDomain, // '@'
+ 0, // 'A'
+ 0, // 'B'
+ 0, // 'C'
+ 0, // 'D'
+ 0, // 'E'
+ 0, // 'F'
+ 0, // 'G'
+ 0, // 'H'
+ 0, // 'I'
+ 0, // 'J'
+ 0, // 'K'
+ 0, // 'L'
+ 0, // 'M'
+ 0, // 'N'
+ 0, // 'O'
+ 0, // 'P'
+ 0, // 'Q'
+ 0, // 'R'
+ 0, // 'S'
+ 0, // 'T'
+ 0, // 'U'
+ 0, // 'V'
+ 0, // 'W'
+ 0, // 'X'
+ 0, // 'Y'
+ 0, // 'Z'
+ UserInfo | InvalidDomain, // '['
+ UserInfo | InvalidDomain | SlashQuestionOrHash, // '\\'
+ UserInfo | InvalidDomain, // ']'
+ UserInfo, // '^'
+ 0, // '_'
+ UserInfo | Default, // '`'
+ 0, // 'a'
+ 0, // 'b'
+ 0, // 'c'
+ 0, // 'd'
+ 0, // 'e'
+ 0, // 'f'
+ 0, // 'g'
+ 0, // 'h'
+ 0, // 'i'
+ 0, // 'j'
+ 0, // 'k'
+ 0, // 'l'
+ 0, // 'm'
+ 0, // 'n'
+ 0, // 'o'
+ 0, // 'p'
+ 0, // 'q'
+ 0, // 'r'
+ 0, // 's'
+ 0, // 't'
+ 0, // 'u'
+ 0, // 'v'
+ 0, // 'w'
+ 0, // 'x'
+ 0, // 'y'
+ 0, // 'z'
+ UserInfo | Default, // '{'
+ UserInfo, // '|'
+ UserInfo | Default, // '}'
+ 0, // '~'
+ QueryPercent, // 0x7F
+ QueryPercent, // 0x80
+ QueryPercent, // 0x81
+ QueryPercent, // 0x82
+ QueryPercent, // 0x83
+ QueryPercent, // 0x84
+ QueryPercent, // 0x85
+ QueryPercent, // 0x86
+ QueryPercent, // 0x87
+ QueryPercent, // 0x88
+ QueryPercent, // 0x89
+ QueryPercent, // 0x8A
+ QueryPercent, // 0x8B
+ QueryPercent, // 0x8C
+ QueryPercent, // 0x8D
+ QueryPercent, // 0x8E
+ QueryPercent, // 0x8F
+ QueryPercent, // 0x90
+ QueryPercent, // 0x91
+ QueryPercent, // 0x92
+ QueryPercent, // 0x93
+ QueryPercent, // 0x94
+ QueryPercent, // 0x95
+ QueryPercent, // 0x96
+ QueryPercent, // 0x97
+ QueryPercent, // 0x98
+ QueryPercent, // 0x99
+ QueryPercent, // 0x9A
+ QueryPercent, // 0x9B
+ QueryPercent, // 0x9C
+ QueryPercent, // 0x9D
+ QueryPercent, // 0x9E
+ QueryPercent, // 0x9F
+ QueryPercent, // 0xA0
+ QueryPercent, // 0xA1
+ QueryPercent, // 0xA2
+ QueryPercent, // 0xA3
+ QueryPercent, // 0xA4
+ QueryPercent, // 0xA5
+ QueryPercent, // 0xA6
+ QueryPercent, // 0xA7
+ QueryPercent, // 0xA8
+ QueryPercent, // 0xA9
+ QueryPercent, // 0xAA
+ QueryPercent, // 0xAB
+ QueryPercent, // 0xAC
+ QueryPercent, // 0xAD
+ QueryPercent, // 0xAE
+ QueryPercent, // 0xAF
+ QueryPercent, // 0xB0
+ QueryPercent, // 0xB1
+ QueryPercent, // 0xB2
+ QueryPercent, // 0xB3
+ QueryPercent, // 0xB4
+ QueryPercent, // 0xB5
+ QueryPercent, // 0xB6
+ QueryPercent, // 0xB7
+ QueryPercent, // 0xB8
+ QueryPercent, // 0xB9
+ QueryPercent, // 0xBA
+ QueryPercent, // 0xBB
+ QueryPercent, // 0xBC
+ QueryPercent, // 0xBD
+ QueryPercent, // 0xBE
+ QueryPercent, // 0xBF
+ QueryPercent, // 0xC0
+ QueryPercent, // 0xC1
+ QueryPercent, // 0xC2
+ QueryPercent, // 0xC3
+ QueryPercent, // 0xC4
+ QueryPercent, // 0xC5
+ QueryPercent, // 0xC6
+ QueryPercent, // 0xC7
+ QueryPercent, // 0xC8
+ QueryPercent, // 0xC9
+ QueryPercent, // 0xCA
+ QueryPercent, // 0xCB
+ QueryPercent, // 0xCC
+ QueryPercent, // 0xCD
+ QueryPercent, // 0xCE
+ QueryPercent, // 0xCF
+ QueryPercent, // 0xD0
+ QueryPercent, // 0xD1
+ QueryPercent, // 0xD2
+ QueryPercent, // 0xD3
+ QueryPercent, // 0xD4
+ QueryPercent, // 0xD5
+ QueryPercent, // 0xD6
+ QueryPercent, // 0xD7
+ QueryPercent, // 0xD8
+ QueryPercent, // 0xD9
+ QueryPercent, // 0xDA
+ QueryPercent, // 0xDB
+ QueryPercent, // 0xDC
+ QueryPercent, // 0xDD
+ QueryPercent, // 0xDE
+ QueryPercent, // 0xDF
+ QueryPercent, // 0xE0
+ QueryPercent, // 0xE1
+ QueryPercent, // 0xE2
+ QueryPercent, // 0xE3
+ QueryPercent, // 0xE4
+ QueryPercent, // 0xE5
+ QueryPercent, // 0xE6
+ QueryPercent, // 0xE7
+ QueryPercent, // 0xE8
+ QueryPercent, // 0xE9
+ QueryPercent, // 0xEA
+ QueryPercent, // 0xEB
+ QueryPercent, // 0xEC
+ QueryPercent, // 0xED
+ QueryPercent, // 0xEE
+ QueryPercent, // 0xEF
+ QueryPercent, // 0xF0
+ QueryPercent, // 0xF1
+ QueryPercent, // 0xF2
+ QueryPercent, // 0xF3
+ QueryPercent, // 0xF4
+ QueryPercent, // 0xF5
+ QueryPercent, // 0xF6
+ QueryPercent, // 0xF7
+ QueryPercent, // 0xF8
+ QueryPercent, // 0xF9
+ QueryPercent, // 0xFA
+ QueryPercent, // 0xFB
+ QueryPercent, // 0xFC
+ QueryPercent, // 0xFD
+ QueryPercent, // 0xFE
+ QueryPercent, // 0xFF
+};
+
+template<typename CharacterType> static bool isC0Control(CharacterType character) { return character <= 0x1F; }
+template<typename CharacterType> static bool isC0ControlOrSpace(CharacterType character) { return character <= 0x20; }
+template<typename CharacterType> static bool isTabOrNewline(CharacterType character) { return character <= 0xD && character >= 0x9 && character != 0xB && character != 0xC; }
+template<typename CharacterType> static bool isInSimpleEncodeSet(CharacterType character) { return character > 0x7E || isC0Control(character); }
+template<typename CharacterType> static bool isInDefaultEncodeSet(CharacterType character) { return character > 0x7E || characterClassTable[character] & Default; }
+template<typename CharacterType> static bool isInUserInfoEncodeSet(CharacterType character) { return character > 0x7E || characterClassTable[character] & UserInfo; }
+template<typename CharacterType> static bool isInvalidDomainCharacter(CharacterType character) { return character <= ']' && characterClassTable[character] & InvalidDomain; }
</ins><span class="cx"> template<typename CharacterType> static bool isPercentOrNonASCII(CharacterType character) { return !isASCII(character) || character == '%'; }
</span><ins>+template<typename CharacterType> static bool isSlashQuestionOrHash(CharacterType character) { return character <= '\\' && characterClassTable[character] & SlashQuestionOrHash; }
+static bool shouldPercentEncodeQueryByte(uint8_t byte) { return characterClassTable[byte] & QueryPercent; }
</ins><span class="cx">
</span><span class="cx"> template<typename CharacterType>
</span><span class="cx"> static bool isWindowsDriveLetter(CodePointIterator<CharacterType> iterator)
</span><span class="lines">@@ -156,7 +425,7 @@
</span><span class="cx"> ++iterator;
</span><span class="cx"> if (iterator.atEnd())
</span><span class="cx"> return true;
</span><del>- return *iterator != '/' && *iterator != '\\' && *iterator != '?' && *iterator != '#';
</del><ins>+ return !isSlashQuestionOrHash(*iterator);
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> static void percentEncode(uint8_t byte, StringBuilder& builder)
</span><span class="lines">@@ -180,21 +449,6 @@
</span><span class="cx"> builder.append(codePoint);
</span><span class="cx"> }
</span><span class="cx">
</span><del>-static bool shouldPercentEncodeQueryByte(uint8_t byte)
-{
- if (byte < 0x21)
- return true;
- if (byte > 0x7E)
- return true;
- if (byte == 0x22)
- return true;
- if (byte == 0x23)
- return true;
- if (byte == 0x3C)
- return true;
- return byte == 0x3E;
-}
-
</del><span class="cx"> static void utf8PercentEncodeQuery(UChar32 codePoint, StringBuilder& builder)
</span><span class="cx"> {
</span><span class="cx"> uint8_t buffer[U8_MAX_LENGTH];
</span><span class="lines">@@ -458,7 +712,7 @@
</span><span class="cx"> return false;
</span><span class="cx"> if (*c == '.') {
</span><span class="cx"> ++c;
</span><del>- return c.atEnd() || *c == '/' || *c == '\\' || *c == '?' || *c == '#';
</del><ins>+ return c.atEnd() || isSlashQuestionOrHash(*c);
</ins><span class="cx"> }
</span><span class="cx"> if (*c != '%')
</span><span class="cx"> return false;
</span><span class="lines">@@ -470,7 +724,7 @@
</span><span class="cx"> return false;
</span><span class="cx"> if (toASCIILower(*c) == dotASCIICode[1]) {
</span><span class="cx"> ++c;
</span><del>- return c.atEnd() || *c == '/' || *c == '\\' || *c == '?' || *c == '#';
</del><ins>+ return c.atEnd() || isSlashQuestionOrHash(*c);
</ins><span class="cx"> }
</span><span class="cx"> return false;
</span><span class="cx"> }
</span><span class="lines">@@ -956,7 +1210,7 @@
</span><span class="cx"> break;
</span><span class="cx"> case State::FileHost:
</span><span class="cx"> LOG_STATE("FileHost");
</span><del>- if (*c == '/' || *c == '\\' || *c == '?' || *c == '#') {
</del><ins>+ if (isSlashQuestionOrHash(*c)) {
</ins><span class="cx"> if (isWindowsDriveLetter(m_buffer, m_url.m_portEnd + 1)) {
</span><span class="cx"> state = State::Path;
</span><span class="cx"> break;
</span></span></pre>
</div>
</div>
</body>
</html>