[webkit-changes] cvs commit: WebCore/layout-tests/fast/encoding invalid-UTF-8-expected.txt invalid-UTF-8.html

Darin darin at opensource.apple.com
Fri Jun 17 09:41:06 PDT 2005


darin       05/06/17 09:41:05

  Modified:    .        ChangeLog
               kwq      KWQTextCodec.mm
  Added:       layout-tests/fast/encoding invalid-UTF-8-expected.txt
                        invalid-UTF-8.html
  Log:
          Reviewed by John.
  
          - fixed http://bugzilla.opendarwin.org/show_bug.cgi?id=3556
            black diamond question mark shown for invalid UTF-8 sequences
  
          Test cases added:
          * layout-tests/fast/encoding/invalid-UTF-8-expected.txt: Added.
          * layout-tests/fast/encoding/invalid-UTF-8.html: Added.
  
          * kwq/KWQTextCodec.mm:
          (unwanted): Added. Returns true for BOM, replacement, and null characters.
          (KWQTextDecoder::appendOmittingUnwanted): Renamed from appendOmittingNullsAndBOMs and
          changed to use the new "unwanted" function, which causes it to omit replacement characters.
          (KWQTextDecoder::convertUsingTEC): Call append function by its new name.
  
  Revision  Changes    Path
  1.4268    +17 -0     WebCore/ChangeLog
  
  Index: ChangeLog
  ===================================================================
  RCS file: /cvs/root/WebCore/ChangeLog,v
  retrieving revision 1.4267
  retrieving revision 1.4268
  diff -u -r1.4267 -r1.4268
  --- ChangeLog	17 Jun 2005 02:13:07 -0000	1.4267
  +++ ChangeLog	17 Jun 2005 16:41:01 -0000	1.4268
  @@ -1,3 +1,20 @@
  +2005-06-17  Darin Adler  <darin at apple.com>
  +
  +        Reviewed by John.
  +
  +        - fixed http://bugzilla.opendarwin.org/show_bug.cgi?id=3556
  +          black diamond question mark shown for invalid UTF-8 sequences
  +
  +        Test cases added:
  +        * layout-tests/fast/encoding/invalid-UTF-8-expected.txt: Added.
  +        * layout-tests/fast/encoding/invalid-UTF-8.html: Added.
  +
  +        * kwq/KWQTextCodec.mm:
  +        (unwanted): Added. Returns true for BOM, replacement, and null characters.
  +        (KWQTextDecoder::appendOmittingUnwanted): Renamed from appendOmittingNullsAndBOMs and
  +        changed to use the new "unwanted" function, which causes it to omit replacement characters.
  +        (KWQTextDecoder::convertUsingTEC): Call append function by its new name.
  +
   2005-06-16  Justin Garcia  <justin.garcia at apple.com>
   
   	Added a few debugging methods to highlight Selections, VisiblePositions and Nodes in the DOM tree.
  
  
  
  1.50      +23 -6     WebCore/kwq/KWQTextCodec.mm
  
  Index: KWQTextCodec.mm
  ===================================================================
  RCS file: /cvs/root/WebCore/kwq/KWQTextCodec.mm,v
  retrieving revision 1.49
  retrieving revision 1.50
  diff -u -r1.49 -r1.50
  --- KWQTextCodec.mm	14 Dec 2004 00:10:18 -0000	1.49
  +++ KWQTextCodec.mm	17 Jun 2005 16:41:05 -0000	1.50
  @@ -28,6 +28,7 @@
   #import "KWQAssertions.h"
   #import "KWQCharsets.h"
   
  +const UniChar replacementCharacter = 0xFFFD;
   const UniChar BOM = 0xFEFF;
   
   class KWQTextDecoder : public QTextDecoder {
  @@ -48,7 +49,7 @@
       OSStatus createTECConverter();
       OSStatus convertOneChunkUsingTEC(const unsigned char *inputBuffer, int inputBufferLength, int &inputLength,
           void *outputBuffer, int outputBufferLength, int &outputLength);
  -    static void appendOmittingNullsAndBOMs(QString &s, const UniChar *characters, int byteCount);
  +    static void appendOmittingUnwanted(QString &s, const UniChar *characters, int byteCount);
       
       KWQTextDecoder(const KWQTextDecoder &);
       KWQTextDecoder &operator=(const KWQTextDecoder &);
  @@ -356,14 +357,30 @@
       return noErr;
   }
   
  -void KWQTextDecoder::appendOmittingNullsAndBOMs(QString &s, const UniChar *characters, int byteCount)
  +// We strip NUL characters because other browsers (at least WinIE) do.
  +// We strip replacement characters because the TEC converter for UTF-8 converts
  +// invalid sequences into replacement characters, but other browsers discard them.
  +// We strip BOM characters because they can show up both at the start of content
  +// and inside content, and we never want them to end up in the decoded text.
  +static inline bool unwanted(UniChar c)
  +{
  +    switch (c) {
  +        case 0:
  +        case replacementCharacter:
  +        case BOM:
  +            return true;
  +        default:
  +            return false;
  +    }
  +}
  +
  +void KWQTextDecoder::appendOmittingUnwanted(QString &s, const UniChar *characters, int byteCount)
   {
       ASSERT(byteCount % sizeof(UniChar) == 0);
       int start = 0;
       int characterCount = byteCount / sizeof(UniChar);
       for (int i = 0; i != characterCount; ++i) {
  -        UniChar c = characters[i];
  -        if (c == 0 || c == BOM) {
  +        if (unwanted(characters[i])) {
               if (start != i) {
                   s.append(reinterpret_cast<const QChar *>(&characters[start]), i - start);
               }
  @@ -498,7 +515,7 @@
                   return QString();
           }
   
  -        appendOmittingNullsAndBOMs(result, buffer, bytesWritten);
  +        appendOmittingUnwanted(result, buffer, bytesWritten);
   
           bufferWasFull = status == kTECOutputBufferFullStatus;
       }
  @@ -506,7 +523,7 @@
       if (flush) {
           unsigned long bytesWritten = 0;
           TECFlushText(_converter, reinterpret_cast<unsigned char *>(buffer), sizeof(buffer), &bytesWritten);
  -        appendOmittingNullsAndBOMs(result, buffer, bytesWritten);
  +        appendOmittingUnwanted(result, buffer, bytesWritten);
       }
   
       // Workaround for a bug in the Text Encoding Converter (see bug 3225472).
  
  
  
  1.1                  WebCore/layout-tests/fast/encoding/invalid-UTF-8-expected.txt
  
  Index: invalid-UTF-8-expected.txt
  ===================================================================
  layer at (0,0) size 800x600
    RenderCanvas at (0,0) size 800x600
  layer at (0,0) size 800x600
    RenderBlock {HTML} at (0,0) size 800x600
      RenderBody {BODY} at (8,8) size 784x576
        RenderBlock {P} at (0,0) size 784x18
          RenderText {TEXT} at (0,0) size 721x18
            text run at (0,0) width 721: "This tests the rendering of invalid UTF-8 sequences. The way other browsers handle these is to omit them entirely."
        RenderBlock {P} at (0,34) size 784x36
          RenderText {TEXT} at (0,0) size 764x36
            text run at (0,0) width 764: "The text before should show the word \"everywhere\" right next to the word \"including\" and the word \"cube\" right next to"
            text run at (0,18) width 396: "the word \"showing\" without any visible characters in between."
        RenderBlock {HR} at (0,86) size 784x2 [border: (1px inset #000000)]
        RenderBlock {P} at (0,104) size 784x18
          RenderText {TEXT} at (0,0) size 346x18
            text run at (0,0) width 346: "everywhereincluding a 120-screen cubeshowing sports"
  
  
  
  1.1                  WebCore/layout-tests/fast/encoding/invalid-UTF-8.html
  
  Index: invalid-UTF-8.html
  ===================================================================
  <html>
  <head>
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body>
  <p>This tests the rendering of invalid UTF-8 sequences. The way other browsers handle these is to omit them entirely.</p>
  <p>The text before should show the word "everywhere" right next to the word "including" and the word "cube" right next to the
  word "showing" without any visible characters in between.</p>
  <hr>
  <p>everywhere—including a 120-screen cube—showing sports</p>
  </body>
  </html>
  
  
  



More information about the webkit-changes mailing list