[webkit-changes] cvs commit: WebCore/khtml/xml dom_docimpl.cpp
dom_docimpl.h dom_elementimpl.cpp
Eric
eseidel at opensource.apple.com
Mon Oct 24 00:24:36 PDT 2005
eseidel 05/10/24 00:24:36
Modified: . ChangeLog
khtml/xml dom_docimpl.cpp dom_docimpl.h dom_elementimpl.cpp
Log:
Bug #: 5317
Submitted by: darin
Reviewed by: hyatt
Fix Qualified Name parsing to match spec.
xhtml dom tests cover these cases.
http://bugzilla.opendarwin.org/show_bug.cgi?id=5317
* khtml/xml/dom_docimpl.cpp:
(isValidNameStart):
(isValidNamePart):
(qualifiedNameIsMalformed):
(DOMImplementationImpl::createDocumentType):
(DOMImplementationImpl::createDocument):
(DocumentImpl::createElementNS):
(DocumentImpl::isValidName):
(DocumentImpl::parseQualifiedName):
* khtml/xml/dom_docimpl.h:
* khtml/xml/dom_elementimpl.cpp:
(ElementImpl::setAttributeNS):
Revision Changes Path
1.268 +21 -0 WebCore/ChangeLog
Index: ChangeLog
===================================================================
RCS file: /cvs/root/WebCore/ChangeLog,v
retrieving revision 1.267
retrieving revision 1.268
diff -u -r1.267 -r1.268
--- ChangeLog 24 Oct 2005 06:55:17 -0000 1.267
+++ ChangeLog 24 Oct 2005 07:24:33 -0000 1.268
@@ -1,3 +1,24 @@
+2005-10-24 Darin Adler <darin at apple.com>
+
+ Reviewed by hyatt.
+
+ Fix Qualified Name parsing to match spec.
+ xhtml dom tests cover these cases.
+ http://bugzilla.opendarwin.org/show_bug.cgi?id=5317
+
+ * khtml/xml/dom_docimpl.cpp:
+ (isValidNameStart):
+ (isValidNamePart):
+ (qualifiedNameIsMalformed):
+ (DOMImplementationImpl::createDocumentType):
+ (DOMImplementationImpl::createDocument):
+ (DocumentImpl::createElementNS):
+ (DocumentImpl::isValidName):
+ (DocumentImpl::parseQualifiedName):
+ * khtml/xml/dom_docimpl.h:
+ * khtml/xml/dom_elementimpl.cpp:
+ (ElementImpl::setAttributeNS):
+
2005-10-23 Julien Palmas <julien.palmas at gmail.com>
Reviewed by eseidel.
1.261 +143 -55 WebCore/khtml/xml/dom_docimpl.cpp
Index: dom_docimpl.cpp
===================================================================
RCS file: /cvs/root/WebCore/khtml/xml/dom_docimpl.cpp,v
retrieving revision 1.260
retrieving revision 1.261
diff -u -r1.260 -r1.261
--- dom_docimpl.cpp 10 Oct 2005 23:48:53 -0000 1.260
+++ dom_docimpl.cpp 24 Oct 2005 07:24:35 -0000 1.261
@@ -111,20 +111,86 @@
DOMImplementationImpl *DOMImplementationImpl::m_instance = 0;
-static bool qualifiedNameIsValid(const DOMString &qualifiedName)
+// DOM Level 2 says (letters added):
+//
+// a) Name start characters must have one of the categories Ll, Lu, Lo, Lt, Nl.
+// b) Name characters other than Name-start characters must have one of the categories Mc, Me, Mn, Lm, or Nd.
+// c) Characters in the compatibility area (i.e. with character code greater than #xF900 and less than #xFFFE) are not allowed in XML names.
+// d) Characters which have a font or compatibility decomposition (i.e. those with a "compatibility formatting tag" in field 5 of the database -- marked by field 5 beginning with a "<") are not allowed.
+// e) The following characters are treated as name-start characters rather than name characters, because the property file classifies them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.
+// f) Characters #x20DD-#x20E0 are excluded (in accordance with Unicode, section 5.14).
+// g) Character #x00B7 is classified as an extender, because the property list so identifies it.
+// h) Character #x0387 is added as a name character, because #x00B7 is its canonical equivalent.
+// i) Characters ':' and '_' are allowed as name-start characters.
+// j) Characters '-' and '.' are allowed as name characters.
+//
+// It also contains complete tables. If we decide it's better, we could include those instead of the following code.
+
+static inline bool isValidNameStart(UChar32 c)
{
- // Not mentioned in spec: empty qualified names are not valid.
- if (qualifiedName.isEmpty())
+ // rule (e) above
+ if ((c >= 0x02BB && c <= 0x02C1) || c == 0x559 || c == 0x6E5 || c == 0x6E6)
+ return true;
+
+ // rule (i) above
+ if (c == ':' || c == '_')
+ return true;
+
+ // rules (a) and (f) above
+ const uint32_t nameStartMask = U_GC_LL_MASK | U_GC_LU_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK;
+ if (!(U_GET_GC_MASK(c) & nameStartMask))
+ return false;
+
+ // rule (c) above
+ if (c >= 0xF900 && c < 0xFFFE)
return false;
- // FIXME: Check for illegal characters.
- // FIXME: Merge/reconcile with DocumentImpl::isValidName.
+
+ // rule (d) above
+ UDecompositionType decompType = static_cast<UDecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
+ if (decompType == U_DT_FONT || decompType == U_DT_COMPAT)
+ return false;
+
return true;
}
-static bool qualifiedNameIsMalformed(const DOMString &qualifiedName)
+static inline bool isValidNamePart(UChar32 c)
+{
+ // rules (a), (e), and (i) above
+ if (isValidNameStart(c))
+ return true;
+
+ // rules (g) and (h) above
+ if (c == 0x00B7 || c == 0x0387)
+ return true;
+
+ // rule (j) above
+ if (c == '-' || c == '.')
+ return true;
+
+ // rules (b) and (f) above
+ const uint32_t otherNamePartMask = U_GC_MC_MASK | U_GC_ME_MASK | U_GC_MN_MASK | U_GC_LM_MASK | U_GC_ND_MASK;
+ if (!(U_GET_GC_MASK(c) & otherNamePartMask))
+ return false;
+
+ // rule (c) above
+ if (c >= 0xF900 && c < 0xFFFE)
+ return false;
+
+ // rule (d) above
+ UDecompositionType decompType = static_cast<UDecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
+ if (decompType == U_DT_FONT || decompType == U_DT_COMPAT)
+ return false;
+
+ return true;
+}
+
+// FIXME: An implementation of this is still waiting for me to understand the distinction between
+// a "malformed" qualified name and one with bad characters in it. For example, is a second colon
+// an illegal character or a malformed qualified name? This will determine both what parameters
+// this function needs to take and exactly what it will do. Should also be exported so that
+// ElementImpl can use it too.
+static bool qualifiedNameIsMalformed(const DOMString &)
{
- assert(qualifiedNameIsValid(qualifiedName));
- // FIXME: Implement this check.
return false;
}
@@ -166,7 +232,8 @@
}
// INVALID_CHARACTER_ERR: Raised if the specified qualified name contains an illegal character.
- if (!qualifiedNameIsValid(qualifiedName)) {
+ DOMString prefix, localName;
+ if (!DocumentImpl::parseQualifiedName(qualifiedName, prefix, localName)) {
exceptioncode = DOMException::INVALID_CHARACTER_ERR;
return 0;
}
@@ -198,7 +265,8 @@
}
// INVALID_CHARACTER_ERR: Raised if the specified qualified name contains an illegal character.
- if (!qualifiedNameIsValid(qualifiedName)) {
+ DOMString prefix, localName;
+ if (!DocumentImpl::parseQualifiedName(qualifiedName, prefix, localName)) {
exceptioncode = DOMException::INVALID_CHARACTER_ERR;
return 0;
}
@@ -649,31 +717,23 @@
return 0;
}
-ElementImpl *DocumentImpl::createElementNS(const DOMString &_namespaceURI, const DOMString &_qualifiedName, int &exceptioncode)
+ElementImpl *DocumentImpl::createElementNS(const DOMString &_namespaceURI, const DOMString &qualifiedName, int &exceptioncode)
{
- // Split the name.
- int exceptionCode = 0;
- ElementImpl* e = 0;
+ // FIXME: We'd like a faster code path that skips this check for calls from inside the engine where the name is known to be valid.
DOMString prefix, localName;
- int colonPos = _qualifiedName.find(':');
- if (colonPos >= 0) {
- prefix = _qualifiedName.substring(0, colonPos);
- localName = _qualifiedName.substring(colonPos+1, _qualifiedName.length() - colonPos);
+ if (!parseQualifiedName(qualifiedName, prefix, localName)) {
+ exceptioncode = DOMException::INVALID_CHARACTER_ERR;
+ return 0;
}
- else
- localName = _qualifiedName;
+
+ ElementImpl *e = 0;
// FIXME: Use registered namespaces and look up in a hash to find the right factory.
if (_namespaceURI == xhtmlNamespaceURI) {
- // FIXME: Really should only be done from the public DOM API. Internal callers know the name is valid.
- if (!isValidName(localName)) {
- exceptioncode = DOMException::INVALID_CHARACTER_ERR;
- return 0;
- }
e = HTMLElementFactory::createHTMLElement(AtomicString(localName), this, 0, false);
if (e && !prefix.isNull()) {
- e->setPrefix(AtomicString(prefix), exceptionCode);
- if (exceptionCode)
+ e->setPrefix(AtomicString(prefix), exceptioncode);
+ if (exceptioncode)
return 0;
}
}
@@ -2561,39 +2621,67 @@
bool DocumentImpl::isValidName(const DOMString &name)
{
- // DOM Level 2 says:
- //
- // Name start characters must have one of the categories Ll, Lu, Lo, Lt, Nl.
- // Name characters other than Name-start characters must have one of the categories Mc, Me, Mn, Lm, or Nd.
- // Characters in the compatibility area (i.e. with character code greater than #xF900 and less than #xFFFE) are not allowed in XML names.
- // Characters which have a font or compatibility decomposition (i.e. those with a "compatibility formatting tag" in field 5 of the database -- marked by field 5 beginning with a "<") are not allowed.
- // The following characters are treated as name-start characters rather than name characters, because the property file classifies them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.
- // Characters #x20DD-#x20E0 are excluded (in accordance with Unicode, section 5.14).
- // Character #x00B7 is classified as an extender, because the property list so identifies it.
- // Character #x0387 is added as a name character, because #x00B7 is its canonical equivalent.
- // Characters ':' and '_' are allowed as name-start characters.
- // Characters '-' and '.' are allowed as name characters.
- //
- // FIXME: Implement the above!
-
- static const char validFirstCharacter[] = "ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz";
- static const char validSubsequentCharacter[] = "ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz0123456789-_:.";
- const unsigned length = name.length();
+ const UChar *s = reinterpret_cast<const UChar *>(name.unicode());
+ unsigned length = name.length();
+
if (length == 0)
return false;
- const QChar * const characters = name.unicode();
- const char fc = characters[0];
- if (!fc)
- return false;
- if (strchr(validFirstCharacter, fc) == 0)
+
+ unsigned i = 0;
+
+ UChar32 c;
+ U16_NEXT(s, i, length, c)
+ if (!isValidNameStart(c))
return false;
- for (unsigned i = 1; i < length; ++i) {
- const char sc = characters[i];
- if (!sc)
- return false;
- if (strchr(validSubsequentCharacter, sc) == 0)
+
+ while (i < length) {
+ U16_NEXT(s, i, length, c)
+ if (!isValidNamePart(c))
return false;
}
+
+ return true;
+}
+
+bool DocumentImpl::parseQualifiedName(const DOMString &qualifiedName, DOMString &prefix, DOMString &localName)
+{
+ unsigned length = qualifiedName.length();
+
+ if (length == 0)
+ return false;
+
+ bool nameStart = true;
+ bool sawColon = false;
+ int colonPos = 0;
+
+ const QChar *s = qualifiedName.unicode();
+ for (unsigned i = 0; i < length; ) {
+ UChar32 c;
+ U16_NEXT(s, i, length, c)
+ if (c == ':') {
+ if (sawColon)
+ return false; // multiple colons: not allowed
+ nameStart = true;
+ sawColon = true;
+ colonPos = i - 1;
+ } else if (nameStart) {
+ if (!isValidNameStart(c))
+ return false;
+ nameStart = false;
+ } else {
+ if (!isValidNamePart(c))
+ return false;
+ }
+ }
+
+ if (!sawColon) {
+ prefix = DOMString();
+ localName = qualifiedName.copy();
+ } else {
+ prefix = qualifiedName.substring(0, colonPos);
+ localName = qualifiedName.substring(colonPos + 1, length - (colonPos + 1));
+ }
+
return true;
}
1.131 +5 -0 WebCore/khtml/xml/dom_docimpl.h
Index: dom_docimpl.h
===================================================================
RCS file: /cvs/root/WebCore/khtml/xml/dom_docimpl.h,v
retrieving revision 1.130
retrieving revision 1.131
diff -u -r1.130 -r1.131
--- dom_docimpl.h 5 Oct 2005 10:31:12 -0000 1.130
+++ dom_docimpl.h 24 Oct 2005 07:24:35 -0000 1.131
@@ -516,6 +516,11 @@
// To get this right for all the XML cases, we probably have to improve this or move it
// and make it sensitive to the type of document.
static bool isValidName(const DOMString &);
+
+ // The following breaks a qualified name into a prefix and a local name.
+ // It also does a validity check, and returns false if the qualified name is invalid
+ // (empty string or invalid characters).
+ static bool parseQualifiedName(const DOMString &qualifiedName, DOMString &prefix, DOMString &localName);
void addElementById(const DOMString &elementId, ElementImpl *element);
void removeElementById(const DOMString &elementId, ElementImpl *element);
1.86 +4 -14 WebCore/khtml/xml/dom_elementimpl.cpp
Index: dom_elementimpl.cpp
===================================================================
RCS file: /cvs/root/WebCore/khtml/xml/dom_elementimpl.cpp,v
retrieving revision 1.85
retrieving revision 1.86
diff -u -r1.85 -r1.86
--- dom_elementimpl.cpp 3 Oct 2005 21:12:52 -0000 1.85
+++ dom_elementimpl.cpp 24 Oct 2005 07:24:35 -0000 1.86
@@ -761,26 +761,16 @@
void ElementImpl::setAttributeNS(const DOMString &namespaceURI, const DOMString &qualifiedName, const DOMString &value, int &exception)
{
- DOMString localName = qualifiedName;
- DOMString prefix;
- int colonpos;
- if ((colonpos = qualifiedName.find(':')) >= 0) {
- prefix = qualifiedName.copy();
- localName = qualifiedName.copy();
- prefix.truncate(colonpos);
- localName.remove(0, colonpos+1);
- }
-
- if (!DocumentImpl::isValidName(localName)) {
+ DOMString prefix, localName;
+ if (!DocumentImpl::parseQualifiedName(qualifiedName, prefix, localName)) {
exception = DOMException::INVALID_CHARACTER_ERR;
return;
}
if (getDocument()->isHTMLDocument())
localName = localName.lower();
-
- setAttribute(QualifiedName(prefix.impl(), localName.impl(),
- namespaceURI.impl()), value.impl(), exception);
+
+ setAttribute(QualifiedName(prefix.impl(), localName.impl(), namespaceURI.impl()), value.impl(), exception);
}
void ElementImpl::removeAttributeNS(const DOMString &namespaceURI, const DOMString &localName, int &exception)
More information about the webkit-changes
mailing list