[webkit-changes] cvs commit: WebCore/khtml/xml dom_docimpl.cpp dom_docimpl.h dom_elementimpl.cpp

Eric eseidel at opensource.apple.com
Mon Oct 24 00:24:36 PDT 2005


eseidel     05/10/24 00:24:36

  Modified:    .        ChangeLog
               khtml/xml dom_docimpl.cpp dom_docimpl.h dom_elementimpl.cpp
  Log:
  Bug #: 5317
  Submitted by: darin
  Reviewed by: hyatt
          Fix Qualified Name parsing to match spec.
          xhtml dom tests cover these cases.
          http://bugzilla.opendarwin.org/show_bug.cgi?id=5317
  
          * khtml/xml/dom_docimpl.cpp:
          (isValidNameStart):
          (isValidNamePart):
          (qualifiedNameIsMalformed):
          (DOMImplementationImpl::createDocumentType):
          (DOMImplementationImpl::createDocument):
          (DocumentImpl::createElementNS):
          (DocumentImpl::isValidName):
          (DocumentImpl::parseQualifiedName):
          * khtml/xml/dom_docimpl.h:
          * khtml/xml/dom_elementimpl.cpp:
          (ElementImpl::setAttributeNS):
  
  Revision  Changes    Path
  1.268     +21 -0     WebCore/ChangeLog
  
  Index: ChangeLog
  ===================================================================
  RCS file: /cvs/root/WebCore/ChangeLog,v
  retrieving revision 1.267
  retrieving revision 1.268
  diff -u -r1.267 -r1.268
  --- ChangeLog	24 Oct 2005 06:55:17 -0000	1.267
  +++ ChangeLog	24 Oct 2005 07:24:33 -0000	1.268
  @@ -1,3 +1,24 @@
  +2005-10-24  Darin Adler  <darin at apple.com>
  +
  +        Reviewed by hyatt.
  +
  +        Fix Qualified Name parsing to match spec.
  +        xhtml dom tests cover these cases.
  +        http://bugzilla.opendarwin.org/show_bug.cgi?id=5317
  +
  +        * khtml/xml/dom_docimpl.cpp:
  +        (isValidNameStart):
  +        (isValidNamePart):
  +        (qualifiedNameIsMalformed):
  +        (DOMImplementationImpl::createDocumentType):
  +        (DOMImplementationImpl::createDocument):
  +        (DocumentImpl::createElementNS):
  +        (DocumentImpl::isValidName):
  +        (DocumentImpl::parseQualifiedName):
  +        * khtml/xml/dom_docimpl.h:
  +        * khtml/xml/dom_elementimpl.cpp:
  +        (ElementImpl::setAttributeNS):
  +
   2005-10-23  Julien Palmas <julien.palmas at gmail.com>
   
           Reviewed by eseidel.
  
  
  
  1.261     +143 -55   WebCore/khtml/xml/dom_docimpl.cpp
  
  Index: dom_docimpl.cpp
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/xml/dom_docimpl.cpp,v
  retrieving revision 1.260
  retrieving revision 1.261
  diff -u -r1.260 -r1.261
  --- dom_docimpl.cpp	10 Oct 2005 23:48:53 -0000	1.260
  +++ dom_docimpl.cpp	24 Oct 2005 07:24:35 -0000	1.261
  @@ -111,20 +111,86 @@
   
   DOMImplementationImpl *DOMImplementationImpl::m_instance = 0;
   
  -static bool qualifiedNameIsValid(const DOMString &qualifiedName)
  +// DOM Level 2 says (letters added):
  +//
  +// a) Name start characters must have one of the categories Ll, Lu, Lo, Lt, Nl.
  +// b) Name characters other than Name-start characters must have one of the categories Mc, Me, Mn, Lm, or Nd.
  +// c) Characters in the compatibility area (i.e. with character code greater than #xF900 and less than #xFFFE) are not allowed in XML names.
  +// d) Characters which have a font or compatibility decomposition (i.e. those with a "compatibility formatting tag" in field 5 of the database -- marked by field 5 beginning with a "<") are not allowed.
  +// e) The following characters are treated as name-start characters rather than name characters, because the property file classifies them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.
  +// f) Characters #x20DD-#x20E0 are excluded (in accordance with Unicode, section 5.14).
  +// g) Character #x00B7 is classified as an extender, because the property list so identifies it.
  +// h) Character #x0387 is added as a name character, because #x00B7 is its canonical equivalent.
  +// i) Characters ':' and '_' are allowed as name-start characters.
  +// j) Characters '-' and '.' are allowed as name characters.
  +//
  +// It also contains complete tables. If we decide it's better, we could include those instead of the following code.
  +
  +static inline bool isValidNameStart(UChar32 c)
   {
  -    // Not mentioned in spec: empty qualified names are not valid.
  -    if (qualifiedName.isEmpty())
  +    // rule (e) above
  +    if ((c >= 0x02BB && c <= 0x02C1) || c == 0x559 || c == 0x6E5 || c == 0x6E6)
  +        return true;
  +
  +    // rule (i) above
  +    if (c == ':' || c == '_')
  +        return true;
  +
  +    // rules (a) and (f) above
  +    const uint32_t nameStartMask = U_GC_LL_MASK | U_GC_LU_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK;
  +    if (!(U_GET_GC_MASK(c) & nameStartMask))
  +        return false;
  +
  +    // rule (c) above
  +    if (c >= 0xF900 && c < 0xFFFE)
           return false;
  -    // FIXME: Check for illegal characters.
  -    // FIXME: Merge/reconcile with DocumentImpl::isValidName.
  +
  +    // rule (d) above
  +    UDecompositionType decompType = static_cast<UDecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
  +    if (decompType == U_DT_FONT || decompType == U_DT_COMPAT)
  +        return false;
  +
       return true;
   }
   
  -static bool qualifiedNameIsMalformed(const DOMString &qualifiedName)
  +static inline bool isValidNamePart(UChar32 c)
  +{
  +    // rules (a), (e), and (i) above
  +    if (isValidNameStart(c))
  +        return true;
  +
  +    // rules (g) and (h) above
  +    if (c == 0x00B7 || c == 0x0387)
  +        return true;
  +
  +    // rule (j) above
  +    if (c == '-' || c == '.')
  +        return true;
  +
  +    // rules (b) and (f) above
  +    const uint32_t otherNamePartMask = U_GC_MC_MASK | U_GC_ME_MASK | U_GC_MN_MASK | U_GC_LM_MASK | U_GC_ND_MASK;
  +    if (!(U_GET_GC_MASK(c) & otherNamePartMask))
  +        return false;
  +
  +    // rule (c) above
  +    if (c >= 0xF900 && c < 0xFFFE)
  +        return false;
  +
  +    // rule (d) above
  +    UDecompositionType decompType = static_cast<UDecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
  +    if (decompType == U_DT_FONT || decompType == U_DT_COMPAT)
  +        return false;
  +
  +    return true;
  +}
  +
  +// FIXME: An implementation of this is still waiting for me to understand the distinction between
  +// a "malformed" qualified name and one with bad characters in it. For example, is a second colon
  +// an illegal character or a malformed qualified name? This will determine both what parameters
  +// this function needs to take and exactly what it will do. Should also be exported so that
  +// ElementImpl can use it too.
  +static bool qualifiedNameIsMalformed(const DOMString &)
   {
  -    assert(qualifiedNameIsValid(qualifiedName));
  -    // FIXME: Implement this check.
       return false;
   }
   
  @@ -166,7 +232,8 @@
       }
   
       // INVALID_CHARACTER_ERR: Raised if the specified qualified name contains an illegal character.
  -    if (!qualifiedNameIsValid(qualifiedName)) {
  +    DOMString prefix, localName;
  +    if (!DocumentImpl::parseQualifiedName(qualifiedName, prefix, localName)) {
           exceptioncode = DOMException::INVALID_CHARACTER_ERR;
           return 0;
       }
  @@ -198,7 +265,8 @@
       }
   
       // INVALID_CHARACTER_ERR: Raised if the specified qualified name contains an illegal character.
  -    if (!qualifiedNameIsValid(qualifiedName)) {
  +    DOMString prefix, localName;
  +    if (!DocumentImpl::parseQualifiedName(qualifiedName, prefix, localName)) {
           exceptioncode = DOMException::INVALID_CHARACTER_ERR;
           return 0;
       }
  @@ -649,31 +717,23 @@
       return 0;
   }
   
  -ElementImpl *DocumentImpl::createElementNS(const DOMString &_namespaceURI, const DOMString &_qualifiedName, int &exceptioncode)
  +ElementImpl *DocumentImpl::createElementNS(const DOMString &_namespaceURI, const DOMString &qualifiedName, int &exceptioncode)
   {
  -    // Split the name.
  -    int exceptionCode = 0;
  -    ElementImpl* e = 0;
  +    // FIXME: We'd like a faster code path that skips this check for calls from inside the engine where the name is known to be valid.
       DOMString prefix, localName;
  -    int colonPos = _qualifiedName.find(':');
  -    if (colonPos >= 0) {
  -        prefix = _qualifiedName.substring(0, colonPos);
  -        localName = _qualifiedName.substring(colonPos+1, _qualifiedName.length() - colonPos);
  +    if (!parseQualifiedName(qualifiedName, prefix, localName)) {
  +        exceptioncode = DOMException::INVALID_CHARACTER_ERR;
  +        return 0;
       }
  -    else
  -        localName = _qualifiedName;
  +
  +    ElementImpl *e = 0;
       
       // FIXME: Use registered namespaces and look up in a hash to find the right factory.
       if (_namespaceURI == xhtmlNamespaceURI) {
  -        // FIXME: Really should only be done from the public DOM API.  Internal callers know the name is valid.
  -        if (!isValidName(localName)) {
  -            exceptioncode = DOMException::INVALID_CHARACTER_ERR;
  -            return 0;
  -        }
           e = HTMLElementFactory::createHTMLElement(AtomicString(localName), this, 0, false);
           if (e && !prefix.isNull()) {
  -            e->setPrefix(AtomicString(prefix), exceptionCode);
  -            if (exceptionCode)
  +            e->setPrefix(AtomicString(prefix), exceptioncode);
  +            if (exceptioncode)
                   return 0;
           }
       }
  @@ -2561,39 +2621,67 @@
   
   bool DocumentImpl::isValidName(const DOMString &name)
   {
  -    // DOM Level 2 says:
  -    //
  -    // Name start characters must have one of the categories Ll, Lu, Lo, Lt, Nl.
  -    // Name characters other than Name-start characters must have one of the categories Mc, Me, Mn, Lm, or Nd.
  -    // Characters in the compatibility area (i.e. with character code greater than #xF900 and less than #xFFFE) are not allowed in XML names.
  -    // Characters which have a font or compatibility decomposition (i.e. those with a "compatibility formatting tag" in field 5 of the database -- marked by field 5 beginning with a "<") are not allowed.
  -    // The following characters are treated as name-start characters rather than name characters, because the property file classifies them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.
  -    // Characters #x20DD-#x20E0 are excluded (in accordance with Unicode, section 5.14).
  -    // Character #x00B7 is classified as an extender, because the property list so identifies it.
  -    // Character #x0387 is added as a name character, because #x00B7 is its canonical equivalent.
  -    // Characters ':' and '_' are allowed as name-start characters.
  -    // Characters '-' and '.' are allowed as name characters.
  -    //
  -    // FIXME: Implement the above!
  -
  -    static const char validFirstCharacter[] = "ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz";
  -    static const char validSubsequentCharacter[] = "ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz0123456789-_:.";
  -    const unsigned length = name.length();
  +    const UChar *s = reinterpret_cast<const UChar *>(name.unicode());
  +    unsigned length = name.length();
  +
       if (length == 0)
           return false;
  -    const QChar * const characters = name.unicode();
  -    const char fc = characters[0];
  -    if (!fc)
  -        return false;
  -    if (strchr(validFirstCharacter, fc) == 0)
  +
  +    unsigned i = 0;
  +
  +    UChar32 c;
  +    U16_NEXT(s, i, length, c)
  +    if (!isValidNameStart(c))
           return false;
  -    for (unsigned i = 1; i < length; ++i) {
  -        const char sc = characters[i];
  -        if (!sc)
  -            return false;
  -        if (strchr(validSubsequentCharacter, sc) == 0)
  +
  +    while (i < length) {
  +        U16_NEXT(s, i, length, c)
  +        if (!isValidNamePart(c))
               return false;
       }
  +
  +    return true;
  +}
  +
  +bool DocumentImpl::parseQualifiedName(const DOMString &qualifiedName, DOMString &prefix, DOMString &localName)
  +{
  +    unsigned length = qualifiedName.length();
  +
  +    if (length == 0)
  +        return false;
  +
  +    bool nameStart = true;
  +    bool sawColon = false;
  +    int colonPos = 0;
  +
  +    const QChar *s = qualifiedName.unicode();
  +    for (unsigned i = 0; i < length; ) {
  +        UChar32 c;
  +        U16_NEXT(s, i, length, c)
  +        if (c == ':') {
  +            if (sawColon)
  +                return false; // multiple colons: not allowed
  +            nameStart = true;
  +            sawColon = true;
  +            colonPos = i - 1;
  +        } else if (nameStart) {
  +            if (!isValidNameStart(c))
  +                return false;
  +            nameStart = false;
  +        } else {
  +            if (!isValidNamePart(c))
  +                return false;
  +        }
  +    }
  +
  +    if (!sawColon) {
  +        prefix = DOMString();
  +        localName = qualifiedName.copy();
  +    } else {
  +        prefix = qualifiedName.substring(0, colonPos);
  +        localName = qualifiedName.substring(colonPos + 1, length - (colonPos + 1));
  +    }
  +
       return true;
   }
   
  
  
  
  1.131     +5 -0      WebCore/khtml/xml/dom_docimpl.h
  
  Index: dom_docimpl.h
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/xml/dom_docimpl.h,v
  retrieving revision 1.130
  retrieving revision 1.131
  diff -u -r1.130 -r1.131
  --- dom_docimpl.h	5 Oct 2005 10:31:12 -0000	1.130
  +++ dom_docimpl.h	24 Oct 2005 07:24:35 -0000	1.131
  @@ -516,6 +516,11 @@
       // To get this right for all the XML cases, we probably have to improve this or move it
       // and make it sensitive to the type of document.
       static bool isValidName(const DOMString &);
  +
  +    // The following breaks a qualified name into a prefix and a local name.
  +    // It also does a validity check, and returns false if the qualified name is invalid
  +    // (empty string or invalid characters).
  +    static bool parseQualifiedName(const DOMString &qualifiedName, DOMString &prefix, DOMString &localName);
       
       void addElementById(const DOMString &elementId, ElementImpl *element);
       void removeElementById(const DOMString &elementId, ElementImpl *element);
  
  
  
  1.86      +4 -14     WebCore/khtml/xml/dom_elementimpl.cpp
  
  Index: dom_elementimpl.cpp
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/xml/dom_elementimpl.cpp,v
  retrieving revision 1.85
  retrieving revision 1.86
  diff -u -r1.85 -r1.86
  --- dom_elementimpl.cpp	3 Oct 2005 21:12:52 -0000	1.85
  +++ dom_elementimpl.cpp	24 Oct 2005 07:24:35 -0000	1.86
  @@ -761,26 +761,16 @@
   
   void ElementImpl::setAttributeNS(const DOMString &namespaceURI, const DOMString &qualifiedName, const DOMString &value, int &exception)
   {
  -    DOMString localName = qualifiedName;
  -    DOMString prefix;
  -    int colonpos;
  -    if ((colonpos = qualifiedName.find(':')) >= 0) {
  -        prefix = qualifiedName.copy();
  -        localName = qualifiedName.copy();
  -        prefix.truncate(colonpos);
  -        localName.remove(0, colonpos+1);
  -    }
  -
  -    if (!DocumentImpl::isValidName(localName)) {
  +    DOMString prefix, localName;
  +    if (!DocumentImpl::parseQualifiedName(qualifiedName, prefix, localName)) {
           exception = DOMException::INVALID_CHARACTER_ERR;
           return;
       }
   
       if (getDocument()->isHTMLDocument())
           localName = localName.lower();
  -        
  -    setAttribute(QualifiedName(prefix.impl(), localName.impl(),
  -                               namespaceURI.impl()), value.impl(), exception);
  +
  +    setAttribute(QualifiedName(prefix.impl(), localName.impl(), namespaceURI.impl()), value.impl(), exception);
   }
   
   void ElementImpl::removeAttributeNS(const DOMString &namespaceURI, const DOMString &localName, int &exception)
  
  
  



More information about the webkit-changes mailing list