[webkit-changes] cvs commit: WebCore/khtml/xml xml_tokenizer.cpp
Eric
eseidel at opensource.apple.com
Thu Dec 29 15:49:04 PST 2005
eseidel 05/12/29 15:49:04
Modified: . ChangeLog
khtml/html htmltokenizer.cpp kentities.gperf
khtml/xml xml_tokenizer.cpp
Added: khtml/html kentities.h
Log:
Bug #: 4301
Submitted by: mark rowe
Reviewed by: eseidel, ggaren, darin
- http://bugzilla.opendarwin.org/show_bug.cgi?id=4301
Support HTML entities on pages parsed as XHTML
Added layout tests:
* fast/parser/entities-in-xhtml.xhtml
* khtml/xml/xml_tokenizer.cpp:
(khtml::XMLTokenizer::setIsXHTMLDocument): Track whether the XML document is XHTML.
(khtml::XMLTokenizer::isXHTMLDocument): Ditto.
(khtml::externalSubsetHandler): Ditto.
(khtml::XMLTokenizer::finish): Ditto.
(khtml::getXHTMLEntity): Look up the HTML entity.
(khtml::getEntityHandler): Look up the HTML entity if this is an XHTML document.
* khtml/html/kentities.h: Added.
* khtml/html/kentities.gperf: changed entity to Entity
Revision Changes Path
1.55 +21 -1 WebCore/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /cvs/root/WebCore/ChangeLog,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -r1.54 -r1.55
--- ChangeLog 29 Dec 2005 11:27:06 -0000 1.54
+++ ChangeLog 29 Dec 2005 23:49:03 -0000 1.55
@@ -1,3 +1,23 @@
+2005-12-29 Mark Rowe <opendarwin.org at bdash.net.nz>
+
+ Reviewed by eseidel, ggaren, darin.
+
+ - http://bugzilla.opendarwin.org/show_bug.cgi?id=4301
+ Support HTML entities on pages parsed as XHTML
+
+ Added layout tests:
+ * fast/parser/entities-in-xhtml.xhtml
+
+ * khtml/xml/xml_tokenizer.cpp:
+ (khtml::XMLTokenizer::setIsXHTMLDocument): Track whether the XML document is XHTML.
+ (khtml::XMLTokenizer::isXHTMLDocument): Ditto.
+ (khtml::externalSubsetHandler): Ditto.
+ (khtml::XMLTokenizer::finish): Ditto.
+ (khtml::getXHTMLEntity): Look up the HTML entity.
+ (khtml::getEntityHandler): Look up the HTML entity if this is an XHTML document.
+ * khtml/html/kentities.h: Added.
+ * khtml/html/kentities.gperf: changed entity to Entity
+
2005-12-29 Mitz Pettel <opendarwin.org at mitzpettel.com>
Reviewed by darin
@@ -72,7 +92,7 @@
(KJS::HTMLAllCollection::toBoolean):
Return false.
-
+
2005-12-28 Mitz Pettel <opendarwin.org at mitzpettel.com>
Reviewed by Eric, landed by ap.
1.132 +1 -8 WebCore/khtml/html/htmltokenizer.cpp
Index: htmltokenizer.cpp
===================================================================
RCS file: /cvs/root/WebCore/khtml/html/htmltokenizer.cpp,v
retrieving revision 1.131
retrieving revision 1.132
diff -u -r1.131 -r1.132
--- htmltokenizer.cpp 28 Dec 2005 18:46:22 -0000 1.131
+++ htmltokenizer.cpp 29 Dec 2005 23:49:03 -0000 1.132
@@ -791,7 +791,7 @@
state.setEntityState(SearchSemicolon);
if (state.entityState() == SearchSemicolon) {
if(cBufferPos > 1) {
- const entity *e = findEntity(cBuffer, cBufferPos);
+ const Entity *e = findEntity(cBuffer, cBufferPos);
if(e)
EntityUnicodeValue = e->code;
@@ -804,9 +804,6 @@
break;
}
case SearchSemicolon:
-
- //kdDebug( 6036 ) << "ENTITY " << EntityUnicodeValue << ", " << res << endl;
-
// Don't allow surrogate code points, or values that are more than 21 bits.
if ((EntityUnicodeValue > 0 && EntityUnicodeValue < 0xD800)
|| (EntityUnicodeValue >= 0xE000 && EntityUnicodeValue <= 0x1FFFFF)) {
@@ -825,11 +822,7 @@
src.push(c1);
src.push(c2);
}
-
} else {
-#ifdef TOKEN_DEBUG
- kdDebug( 6036 ) << "unknown entity!" << endl;
-#endif
checkBuffer(10);
// ignore the sequence, add it to the buffer as plaintext
*dest++ = '&';
1.6 +2 -2 WebCore/khtml/html/kentities.gperf
Index: kentities.gperf
===================================================================
RCS file: /cvs/root/WebCore/khtml/html/kentities.gperf,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- kentities.gperf 22 Dec 2005 16:50:07 -0000 1.5
+++ kentities.gperf 29 Dec 2005 23:49:03 -0000 1.6
@@ -25,9 +25,9 @@
"gperf -a -L "ANSI-C" -C -G -c -o -t -k '*' -NfindEntity -D -s 2 khtmlentities.gperf > entities.c"
from kentities.gperf
-*/
+*/
%}
-struct entity {
+struct Entity {
const char *name;
int code;
};
1.1 WebCore/khtml/html/kentities.h
Index: kentities.h
===================================================================
/*
* Copyright (C) 2004 Apple Computer, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// This must be kept in sync with khtml/misc/kentities.gperf
struct Entity {
const char *name;
int code;
};
const struct Entity *findEntity(const char *str, unsigned int len);
1.61 +46 -4 WebCore/khtml/xml/xml_tokenizer.cpp
Index: xml_tokenizer.cpp
===================================================================
RCS file: /cvs/root/WebCore/khtml/xml/xml_tokenizer.cpp,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- xml_tokenizer.cpp 23 Dec 2005 18:44:29 -0000 1.60
+++ xml_tokenizer.cpp 29 Dec 2005 23:49:04 -0000 1.61
@@ -48,6 +48,9 @@
#include <qptrstack.h>
+#include "khtml/html/kentities.h" // for xhtml entity name lookup
+#include <kxmlcore/Assertions.h>
+
using namespace DOM;
using namespace HTMLNames;
@@ -83,6 +86,9 @@
virtual bool isWaitingForScripts() const;
virtual void stopParsing();
+ void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
+ bool isXHTMLDocument() const { return m_isXHTMLDocument; }
+
#ifdef KHTML_XSLT
void setTransformSource(DocumentImpl* doc);
#endif
@@ -125,6 +131,7 @@
bool m_sawError;
bool m_sawXSLTransform;
+ bool m_isXHTMLDocument;
int m_errorCount;
int m_lastErrorLine;
@@ -607,18 +614,46 @@
va_end(args);
}
+// Using a global variable entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
+// a hack to avoid malloc/free. Using a global variable like this could cause trouble
+// if libxml implementation details were to change
+static xmlChar sharedXHTMLEntityResult[5] = {0,0,0,0,0};
+static xmlEntity sharedXHTMLEntity = {
+ 0, XML_ENTITY_DECL, 0, 0, 0, 0, 0, 0, 0,
+ sharedXHTMLEntityResult, sharedXHTMLEntityResult, 0,
+ XML_INTERNAL_PREDEFINED_ENTITY, 0, 0, 0, 0, 0
+};
+
+static xmlEntityPtr getXHTMLEntity(const xmlChar *name)
+{
+ const char *namePosition = (const char *)name;
+ const Entity *e = findEntity(namePosition, strlen(namePosition));
+ if (!e)
+ return 0;
+
+ QCString value = QString(QChar(e->code)).utf8();
+ assert(value.length() < 5);
+ sharedXHTMLEntity.length = value.length();
+ sharedXHTMLEntity.name = name;
+ memcpy(sharedXHTMLEntityResult, value.data(), sharedXHTMLEntity.length);
+
+ return &sharedXHTMLEntity;
+}
+
static xmlEntityPtr getEntityHandler(void *closure, const xmlChar *name)
{
xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
xmlEntityPtr ent = xmlGetPredefinedEntity(name);
- if(ent)
+ if (ent)
return ent;
- // Work around a libxml SAX2 bug that causes charactersHandler to be called twice.
- bool inAttr = ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE;
ent = xmlGetDocEntity(ctxt->myDoc, name);
+ if (!ent && getTokenizer(closure)->isXHTMLDocument())
+ ent = getXHTMLEntity(name);
+
+ // Work around a libxml SAX2 bug that causes charactersHandler to be called twice.
if (ent)
- ctxt->replaceEntities = inAttr || (ent->etype != XML_INTERNAL_GENERAL_ENTITY);
+ ctxt->replaceEntities = (ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) || (ent->etype != XML_INTERNAL_GENERAL_ENTITY);
return ent;
}
@@ -629,6 +664,12 @@
xmlSAX2InternalSubset(closure, name, externalID, systemID);
}
+static void externalSubsetHandler(void *closure, const xmlChar *name, const xmlChar *externalId, const xmlChar *systemId)
+{
+ if (toQString(name).contains("html"))
+ getTokenizer(closure)->setIsXHTMLDocument(true);
+}
+
void XMLTokenizer::finish()
{
if (m_xmlCode.isEmpty())
@@ -648,6 +689,7 @@
sax.getEntity = getEntityHandler;
sax.startDocument = xmlSAX2StartDocument;
sax.internalSubset = internalSubsetHandler;
+ sax.externalSubset = externalSubsetHandler;
sax.entityDecl = xmlSAX2EntityDecl;
sax.initialized = XML_SAX2_MAGIC;
More information about the webkit-changes
mailing list