[webkit-changes] cvs commit: JavaScriptCore/pcre pcre_internal.h pcre_xclass.c

Geoffrey ggaren at opensource.apple.com
Sun Nov 6 12:41:09 PST 2005


ggaren      05/11/06 12:41:08

  Modified:    .        ChangeLog
               pcre     pcre_internal.h pcre_xclass.c
  Log:
          Reviewed by Darin.
  
          - Fixed http://bugzilla.opendarwin.org/show_bug.cgi?id=5571
            REGRESSION (412.5-TOT): duplicated words/sentences at
            shakespeer.sourceforge.net
  
          Our UTF16-modified PCRE didn't work with extended character classes
          (classes involving characters > 255) because it used the GETCHARINC
          macro to read through them. In UTF16 mode, GETCHARINC expects UTF16
          input, but PCRE encodes character classes in UTF8 regardless of the
          input mode of the subject string.
  
          The fix is to explicitly define GETUTF8CHARINC, and to use it,
          rather than GETCHARINC, when reading extended character classes.
  
          In UTF8 mode, we simply define GETCHARINC to be GETUTF8CHARINC.
  
          * pcre/pcre_internal.h:
          * pcre/pcre_xclass.c:
          (_pcre_xclass):
  
  Revision  Changes    Path
  1.878     +23 -0     JavaScriptCore/ChangeLog
  
  Index: ChangeLog
  ===================================================================
  RCS file: /cvs/root/JavaScriptCore/ChangeLog,v
  retrieving revision 1.877
  retrieving revision 1.878
  diff -u -r1.877 -r1.878
  --- ChangeLog	6 Nov 2005 06:20:41 -0000	1.877
  +++ ChangeLog	6 Nov 2005 20:41:07 -0000	1.878
  @@ -1,3 +1,26 @@
  +2005-11-06  Geoffrey Garen  <ggaren at apple.com>
  +
  +        Reviewed by NOBODY (OOPS!).
  +
  +        - Fixed http://bugzilla.opendarwin.org/show_bug.cgi?id=5571
  +          REGRESSION (412.5-TOT): duplicated words/sentences at 
  +          shakespeer.sourceforge.net
  +
  +        Our UTF16-modified PCRE didn't work with extended character classes
  +        (classes involving characters > 255) because it used the GETCHARINC
  +        macro to read through them. In UTF16 mode, GETCHARINC expects UTF16 
  +        input, but PCRE encodes character classes in UTF8 regardless of the
  +        input mode of the subject string.
  +
  +        The fix is to explicitly define GETUTF8CHARINC, and to use it,
  +        rather than GETCHARINC, when reading extended character classes. 
  +        
  +        In UTF8 mode, we simply define GETCHARINC to be GETUTF8CHARINC.
  +
  +        * pcre/pcre_internal.h:
  +        * pcre/pcre_xclass.c:
  +        (_pcre_xclass):
  +
   2005-11-05  Geoffrey Garen  <ggaren at apple.com>
   
           Patch by Mitz Pettel, reviewed by Maciej.
  
  
  
  1.4       +18 -13    JavaScriptCore/pcre/pcre_internal.h
  
  Index: pcre_internal.h
  ===================================================================
  RCS file: /cvs/root/JavaScriptCore/pcre/pcre_internal.h,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- pcre_internal.h	9 Sep 2005 00:51:07 -0000	1.3
  +++ pcre_internal.h	6 Nov 2005 20:41:08 -0000	1.4
  @@ -291,6 +291,23 @@
       len += gcaa; \
       }
   
  +/* Get the next UTF-8 character, advancing the pointer. This is called when we
  +know we are in UTF-8 mode. */
  +
  +#define GETUTF8CHARINC(c, eptr) \
  +c = *eptr++; \
  +if ((c & 0xc0) == 0xc0) \
  +{ \
  +  int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
  +    int gcss = 6*gcaa; \
  +      c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
  +        while (gcaa-- > 0) \
  +        { \
  +          gcss -= 6; \
  +            c |= (*eptr++ & 0x3f) << gcss; \
  +        } \
  +}
  +
   #if PCRE_UTF16
   
   #define LEAD_OFFSET (0xd800 - (0x10000 >> 10))
  @@ -368,19 +385,7 @@
   /* Get the next UTF-8 character, advancing the pointer. This is called when we
   know we are in UTF-8 mode. */
   
  -#define GETCHARINC(c, eptr) \
  -  c = *eptr++; \
  -  if ((c & 0xc0) == 0xc0) \
  -    { \
  -    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
  -    int gcss = 6*gcaa; \
  -    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
  -    while (gcaa-- > 0) \
  -      { \
  -      gcss -= 6; \
  -      c |= (*eptr++ & 0x3f) << gcss; \
  -      } \
  -    }
  +#define GETCHARINC GETUTF8CHARINC
   
   /* Get the next character, testing for UTF-8 mode, and advancing the pointer */
   
  
  
  
  1.4       +3 -3      JavaScriptCore/pcre/pcre_xclass.c
  
  Index: pcre_xclass.c
  ===================================================================
  RCS file: /cvs/root/JavaScriptCore/pcre/pcre_xclass.c,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- pcre_xclass.c	9 Sep 2005 00:51:07 -0000	1.3
  +++ pcre_xclass.c	6 Nov 2005 20:41:08 -0000	1.4
  @@ -87,13 +87,13 @@
     int x, y;
     if (t == XCL_SINGLE)
       {
  -    GETCHARINC(x, data);
  +    GETUTF8CHARINC(x, data);
       if (c == x) return !negated;
       }
     else if (t == XCL_RANGE)
       {
  -    GETCHARINC(x, data);
  -    GETCHARINC(y, data);
  +    GETUTF8CHARINC(x, data);
  +    GETUTF8CHARINC(y, data);
       if (c >= x && c <= y) return !negated;
       }
   
  
  
  



More information about the webkit-changes mailing list