[webkit-changes] cvs commit: WebCore/khtml/xml xml_tokenizer.cpp

Eric eseidel at opensource.apple.com
Thu Dec 29 15:49:04 PST 2005


eseidel     05/12/29 15:49:04

  Modified:    .        ChangeLog
               khtml/html htmltokenizer.cpp kentities.gperf
               khtml/xml xml_tokenizer.cpp
  Added:       khtml/html kentities.h
  Log:
  Bug #: 4301
  Submitted by: mark rowe
  Reviewed by: eseidel, ggaren, darin
          - http://bugzilla.opendarwin.org/show_bug.cgi?id=4301
          Support HTML entities on pages parsed as XHTML
  
          Added layout tests:
          * fast/parser/entities-in-xhtml.xhtml
  
          * khtml/xml/xml_tokenizer.cpp:
          (khtml::XMLTokenizer::setIsXHTMLDocument): Track whether the XML document is XHTML.
          (khtml::XMLTokenizer::isXHTMLDocument): Ditto.
          (khtml::externalSubsetHandler): Ditto.
          (khtml::XMLTokenizer::finish): Ditto.
          (khtml::getXHTMLEntity): Look up the HTML entity.
          (khtml::getEntityHandler): Look up the HTML entity if this is an XHTML document.
          * khtml/html/kentities.h: Added.
          * khtml/html/kentities.gperf: changed entity to Entity
  
  Revision  Changes    Path
  1.55      +21 -1     WebCore/ChangeLog
  
  Index: ChangeLog
  ===================================================================
  RCS file: /cvs/root/WebCore/ChangeLog,v
  retrieving revision 1.54
  retrieving revision 1.55
  diff -u -r1.54 -r1.55
  --- ChangeLog	29 Dec 2005 11:27:06 -0000	1.54
  +++ ChangeLog	29 Dec 2005 23:49:03 -0000	1.55
  @@ -1,3 +1,23 @@
  +2005-12-29  Mark Rowe  <opendarwin.org at bdash.net.nz>
  +
  +        Reviewed by eseidel, ggaren, darin.
  +        
  +        - http://bugzilla.opendarwin.org/show_bug.cgi?id=4301
  +        Support HTML entities on pages parsed as XHTML
  +
  +        Added layout tests:
  +        * fast/parser/entities-in-xhtml.xhtml
  +
  +        * khtml/xml/xml_tokenizer.cpp:
  +        (khtml::XMLTokenizer::setIsXHTMLDocument): Track whether the XML document is XHTML.
  +        (khtml::XMLTokenizer::isXHTMLDocument): Ditto.
  +        (khtml::externalSubsetHandler): Ditto.
  +        (khtml::XMLTokenizer::finish): Ditto.
  +        (khtml::getXHTMLEntity): Look up the HTML entity.
  +        (khtml::getEntityHandler): Look up the HTML entity if this is an XHTML document.
  +        * khtml/html/kentities.h: Added.
  +        * khtml/html/kentities.gperf: changed entity to Entity
  +
   2005-12-29  Mitz Pettel  <opendarwin.org at mitzpettel.com>
   
           Reviewed by darin
  @@ -72,7 +92,7 @@
           
           (KJS::HTMLAllCollection::toBoolean):
           Return false.
  -        
  +
   2005-12-28  Mitz Pettel  <opendarwin.org at mitzpettel.com>
   
           Reviewed by Eric, landed by ap.
  
  
  
  1.132     +1 -8      WebCore/khtml/html/htmltokenizer.cpp
  
  Index: htmltokenizer.cpp
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/html/htmltokenizer.cpp,v
  retrieving revision 1.131
  retrieving revision 1.132
  diff -u -r1.131 -r1.132
  --- htmltokenizer.cpp	28 Dec 2005 18:46:22 -0000	1.131
  +++ htmltokenizer.cpp	29 Dec 2005 23:49:03 -0000	1.132
  @@ -791,7 +791,7 @@
                   state.setEntityState(SearchSemicolon);
               if (state.entityState() == SearchSemicolon) {
                   if(cBufferPos > 1) {
  -                    const entity *e = findEntity(cBuffer, cBufferPos);
  +                    const Entity *e = findEntity(cBuffer, cBufferPos);
                       if(e)
                           EntityUnicodeValue = e->code;
   
  @@ -804,9 +804,6 @@
                   break;
           }
           case SearchSemicolon:
  -
  -            //kdDebug( 6036 ) << "ENTITY " << EntityUnicodeValue << ", " << res << endl;
  -
               // Don't allow surrogate code points, or values that are more than 21 bits.
               if ((EntityUnicodeValue > 0 && EntityUnicodeValue < 0xD800)
                       || (EntityUnicodeValue >= 0xE000 && EntityUnicodeValue <= 0x1FFFFF)) {
  @@ -825,11 +822,7 @@
                       src.push(c1);
                       src.push(c2);
                   }
  -
               } else {
  -#ifdef TOKEN_DEBUG
  -                kdDebug( 6036 ) << "unknown entity!" << endl;
  -#endif
                   checkBuffer(10);
                   // ignore the sequence, add it to the buffer as plaintext
                   *dest++ = '&';
  
  
  
  1.6       +2 -2      WebCore/khtml/html/kentities.gperf
  
  Index: kentities.gperf
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/html/kentities.gperf,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- kentities.gperf	22 Dec 2005 16:50:07 -0000	1.5
  +++ kentities.gperf	29 Dec 2005 23:49:03 -0000	1.6
  @@ -25,9 +25,9 @@
       "gperf -a -L "ANSI-C" -C -G -c -o -t -k '*' -NfindEntity -D -s 2 khtmlentities.gperf > entities.c"   
       from kentities.gperf 
   
  -*/  
  +*/
   %}
  -struct entity {
  +struct Entity {
       const char *name;
       int code;
   };
  
  
  
  1.1                  WebCore/khtml/html/kentities.h
  
  Index: kentities.h
  ===================================================================
  /*
   * Copyright (C) 2004 Apple Computer, Inc.  All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in the
   *    documentation and/or other materials provided with the distribution.
   *
   * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
   * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
   * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
   */
  
  // This must be kept in sync with khtml/misc/kentities.gperf
  struct Entity {
      const char *name;
      int code;
  };
  
  const struct Entity *findEntity(const char *str, unsigned int len);
  
  
  
  1.61      +46 -4     WebCore/khtml/xml/xml_tokenizer.cpp
  
  Index: xml_tokenizer.cpp
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/xml/xml_tokenizer.cpp,v
  retrieving revision 1.60
  retrieving revision 1.61
  diff -u -r1.60 -r1.61
  --- xml_tokenizer.cpp	23 Dec 2005 18:44:29 -0000	1.60
  +++ xml_tokenizer.cpp	29 Dec 2005 23:49:04 -0000	1.61
  @@ -48,6 +48,9 @@
   
   #include <qptrstack.h>
   
  +#include "khtml/html/kentities.h"  // for xhtml entity name lookup
  +#include <kxmlcore/Assertions.h>
  +
   using namespace DOM;
   using namespace HTMLNames;
   
  @@ -83,6 +86,9 @@
       virtual bool isWaitingForScripts() const;
       virtual void stopParsing();
   
  +    void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
  +    bool isXHTMLDocument() const { return m_isXHTMLDocument; }
  +
   #ifdef KHTML_XSLT
       void setTransformSource(DocumentImpl* doc);
   #endif
  @@ -125,6 +131,7 @@
   
       bool m_sawError;
       bool m_sawXSLTransform;
  +    bool m_isXHTMLDocument;
       
       int m_errorCount;
       int m_lastErrorLine;
  @@ -607,18 +614,46 @@
       va_end(args);
   }
   
  +// Using a global variable entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
  +// a hack to avoid malloc/free. Using a global variable like this could cause trouble
  +// if libxml implementation details were to change
  +static xmlChar sharedXHTMLEntityResult[5] = {0,0,0,0,0};
  +static xmlEntity sharedXHTMLEntity = {
  +    0, XML_ENTITY_DECL, 0, 0, 0, 0, 0, 0, 0, 
  +    sharedXHTMLEntityResult, sharedXHTMLEntityResult, 0,
  +    XML_INTERNAL_PREDEFINED_ENTITY, 0, 0, 0, 0, 0
  +};
  +
  +static xmlEntityPtr getXHTMLEntity(const xmlChar *name)
  +{
  +    const char *namePosition = (const char *)name;
  +    const Entity *e = findEntity(namePosition, strlen(namePosition));
  +    if (!e)
  +        return 0;
  +
  +    QCString value = QString(QChar(e->code)).utf8();
  +    assert(value.length() < 5);
  +    sharedXHTMLEntity.length = value.length();
  +    sharedXHTMLEntity.name = name;
  +    memcpy(sharedXHTMLEntityResult, value.data(), sharedXHTMLEntity.length);
  +
  +    return &sharedXHTMLEntity;
  +}
  +
   static xmlEntityPtr getEntityHandler(void *closure, const xmlChar *name)
   {
       xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
       xmlEntityPtr ent = xmlGetPredefinedEntity(name);
  -    if(ent)
  +    if (ent)
           return ent;
   
  -    // Work around a libxml SAX2 bug that causes charactersHandler to be called twice.
  -    bool inAttr = ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE;
       ent = xmlGetDocEntity(ctxt->myDoc, name);
  +    if (!ent && getTokenizer(closure)->isXHTMLDocument())
  +        ent = getXHTMLEntity(name);
  +
  +    // Work around a libxml SAX2 bug that causes charactersHandler to be called twice.
       if (ent)
  -        ctxt->replaceEntities = inAttr || (ent->etype != XML_INTERNAL_GENERAL_ENTITY);
  +        ctxt->replaceEntities = (ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) || (ent->etype != XML_INTERNAL_GENERAL_ENTITY);
       
       return ent;
   }
  @@ -629,6 +664,12 @@
       xmlSAX2InternalSubset(closure, name, externalID, systemID);
   }
   
  +static void externalSubsetHandler(void *closure, const xmlChar *name, const xmlChar *externalId, const xmlChar *systemId)
  +{
  +    if (toQString(name).contains("html"))
  +        getTokenizer(closure)->setIsXHTMLDocument(true);
  +}
  +
   void XMLTokenizer::finish()
   {
       if (m_xmlCode.isEmpty())
  @@ -648,6 +689,7 @@
       sax.getEntity = getEntityHandler;
       sax.startDocument = xmlSAX2StartDocument;
       sax.internalSubset = internalSubsetHandler;
  +    sax.externalSubset = externalSubsetHandler;
       sax.entityDecl = xmlSAX2EntityDecl;
       sax.initialized = XML_SAX2_MAGIC;
       
  
  
  



More information about the webkit-changes mailing list