[webkit-changes] cvs commit: WebCore/khtml/rendering bidi.cpp break_lines.cpp break_lines.h render_text.cpp

Sun Sep 4 00:42:32 PDT 2005

darin       05/09/04 00:42:32

  Modified:    .        ChangeLog
               khtml/rendering bidi.cpp break_lines.cpp break_lines.h
                        render_text.cpp
  Log:
          Reviewed and landed by Darin.

          - fixed <rdar://problem/3698926> so slow it feels like a hang calling UCFindTextBreak() tons of times at forum.presence-pc.com (4789)
            also http://bugzilla.opendarwin.org/show_bug.cgi?id=4789

          No test cases added because this is a performance fix. Existing test cases continue to work.

          * khtml/rendering/break_lines.h: Declare the new nextBreakablePosition and also define a new
          isBreakable function that adds an in/out "next breakable position" parameter.
          * khtml/rendering/break_lines.cpp: (khtml::nextBreakablePosition): Replaced the old isBreakable
          with this function.

          * khtml/rendering/bidi.cpp: (khtml::RenderBlock::findNextLineBreak): Call the new version of
          isBreakable that uses the previously-found "next breakable" position until we pass it rather
          than analyzing each position separately to see if we can break there.

          * khtml/rendering/render_text.cpp: (RenderText::calcMinMaxWidth): Call the new isBreakable, and
          also do some things to streamline and perhaps speed up: a) use an index withing the string rather
          than within the word when finding a word break, b) use a cached copy of the string base pointer
          and the string length rather than repeatedly dereferencing str, c) use a cached copy of the current
          character rather than repeatedly fetching it. Darin also changed one confusing use of ? : to just
          use && instead.

  Revision  Changes    Path
  1.84      +25 -0     WebCore/ChangeLog

  Index: ChangeLog
  ===================================================================
  RCS file: /cvs/root/WebCore/ChangeLog,v
  retrieving revision 1.83
  retrieving revision 1.84
  diff -u -r1.83 -r1.84
  --- ChangeLog	3 Sep 2005 23:09:57 -0000	1.83
  +++ ChangeLog	4 Sep 2005 07:42:30 -0000	1.84
  @@ -1,3 +1,28 @@
  +2005-09-04  Mitz Pettel  <opendarwin.org at mitzpettel.com>
  +
  +        Reviewed and landed by Darin.
  +
  +        - fixed <rdar://problem/3698926> so slow it feels like a hang calling UCFindTextBreak() tons of times at forum.presence-pc.com (4789)
  +          also http://bugzilla.opendarwin.org/show_bug.cgi?id=4789
  +
  +        No test cases added because this is a performance fix. Existing test cases continue to work.
  +
  +        * khtml/rendering/break_lines.h: Declare the new nextBreakablePosition and also define a new
  +        isBreakable function that adds an in/out "next breakable position" parameter.
  +        * khtml/rendering/break_lines.cpp: (khtml::nextBreakablePosition): Replaced the old isBreakable
  +        with this function.
  +
  +        * khtml/rendering/bidi.cpp: (khtml::RenderBlock::findNextLineBreak): Call the new version of
  +        isBreakable that uses the previously-found "next breakable" position until we pass it rather
  +        than analyzing each position separately to see if we can break there.
  +
  +        * khtml/rendering/render_text.cpp: (RenderText::calcMinMaxWidth): Call the new isBreakable, and
  +        also do some things to streamline and perhaps speed up: a) use an index withing the string rather
  +        than within the word when finding a word break, b) use a cached copy of the string base pointer
  +        and the string length rather than repeatedly dereferencing str, c) use a cached copy of the current
  +        character rather than repeatedly fetching it. Darin also changed one confusing use of ? : to just
  +        use && instead.
  +
   2005-09-03  David Hyatt  <hyatt at apple.com>

   	This patch substantially reworks how mouse clicking and double clicking work in the DOM.

  1.141     +4 -2      WebCore/khtml/rendering/bidi.cpp

  Index: bidi.cpp
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/rendering/bidi.cpp,v
  retrieving revision 1.140
  retrieving revision 1.141
  diff -u -r1.140 -r1.141
  --- bidi.cpp	15 Aug 2005 23:31:16 -0000	1.140
  +++ bidi.cpp	4 Sep 2005 07:42:31 -0000	1.141
  @@ -2074,13 +2074,14 @@
               bool appliedEndWidth = false;

               int wrapW = tmpW;
  +            int nextBreakable = -1;

               while(len) {
                   bool previousCharacterIsSpace = currentCharacterIsSpace;
                   bool previousCharacterIsWS = currentCharacterIsWS;
                   const QChar c = str[pos];
                   currentCharacterIsSpace = c == ' ' || c == '\t' || (!isPre && (c == '\n'));
  -                
  +
                   if (isPre || !currentCharacterIsSpace)
                       isLineEmpty = false;

  @@ -2118,7 +2119,8 @@

                   if (breakWords)
                       wrapW += t->width(pos, 1, f, w+wrapW);
  -                if (c == '\n' || (!isPre && isBreakable(str, pos, strlen, breakNBSP)) || (breakWords && (w + wrapW > width))) {
  +
  +                if (c == '\n' || (!isPre && isBreakable(str, pos, strlen, nextBreakable, breakNBSP)) || (breakWords && (w + wrapW > width))) {
                       if (ignoringSpaces) {
                           if (!currentCharacterIsSpace) {
                               // Stop ignoring spaces and begin at this

  1.24      +54 -110   WebCore/khtml/rendering/break_lines.cpp

  Index: break_lines.cpp
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/rendering/break_lines.cpp,v
  retrieving revision 1.23
  retrieving revision 1.24
  diff -u -r1.23 -r1.24
  --- break_lines.cpp	27 Aug 2005 18:55:00 -0000	1.23
  +++ break_lines.cpp	4 Sep 2005 07:42:31 -0000	1.24
  @@ -1,120 +1,64 @@
  -#include <break_lines.h>
  -
  -#ifdef HAVE_THAI_BREAKS
  -#include "ThBreakIterator.h"
  +/*
  + * This file is part of the DOM implementation for KDE.
  + *
  + * Copyright (C) 2005 Apple Computer, Inc.
  + *
  + * This library is free software; you can redistribute it and/or
  + * modify it under the terms of the GNU Library General Public
  + * License as published by the Free Software Foundation; either
  + * version 2 of the License, or (at your option) any later version.
  + *
  + * This library is distributed in the hope that it will be useful,
  + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  + * Library General Public License for more details.
  + *
  + * You should have received a copy of the GNU Library General Public License
  + * along with this library; see the file COPYING.LIB.  If not, write to
  + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  + * Boston, MA 02111-1307, USA.
  + *
  + */

  -static const QChar *cached = 0;
  -static QCString *cachedString = 0;
  -static ThBreakIterator *thaiIt = 0;
  -#endif
  +#include "break_lines.h"

  -#if APPLE_CHANGES
   #include <CoreServices/CoreServices.h>
  -#endif
  -
  -void cleanupLineBreaker()
  -{
  -#ifdef HAVE_THAI_BREAKS
  -    if ( cachedString )
  -	delete cachedString;
  -    if ( thaiIt )
  -	delete thaiIt;
  -    cached = 0;
  -#endif
  -}

   namespace khtml {
  -/*
  -  This function returns true, if the string can bre broken before the 
  -  character at position pos in the string s with length len
  -*/
  -bool isBreakable(const QChar *s, int pos, int len, bool breakNBSP)
  +
  +int nextBreakablePosition(const QChar *str, int pos, int len, bool breakNBSP)
   {
  -#if !APPLE_CHANGES
  -    const QChar *c = s+pos;
  -    unsigned short ch = c->unicode();
  -    if ( ch > 0xff ) {
  -	// not latin1, need to do more sophisticated checks for asian fonts
  -	unsigned char row = c->row();
  -	if ( row == 0x0e ) {
  -	    // 0e00 - 0e7f == Thai
  -	    if ( c->cell() < 0x80 ) {
  -#ifdef HAVE_THAI_BREAKS
  -		// check for thai
  -		if( s != cached ) {
  -		    // build up string of thai chars
  -		    QTextCodec *thaiCodec = QTextCodec::codecForMib(2259);
  -		    if ( !cachedString )
  -			cachedString = new QCString;
  -		    if ( !thaiIt )
  -			thaiIt = ThBreakIterator::createWordInstance(); 
  -		    *cachedString = thaiCodec->fromUnicode( QConstString( (QChar *)s, len ).qstring() );
  -		}
  -		thaiIt->setText((tchar *)cachedString->data());
  -		for(int i = thaiIt->first(); i != thaiIt->DONE; i = thaiIt->next() ) {
  -		    if( i == pos )
  -			return true;
  -		    if( i > pos )
  -			return false;
  -		}
  -		return false;
  -#else
  -		// if we don't have a thai line breaking lib, allow
  -		// breaks everywhere except directly before punctuation.
  -		return true;
  -#endif
  -	    } else 
  -		return false;
  -	}
  -	if ( row > 0x2d && row < 0xfb || row == 0x11 )
  -	    // asian line breaking. Everywhere allowed except directly
  -	    // in front of a punctuation character.
  -	    return true;
  -	else // no asian font
  -	    return c->isSpace();
  -    } else {
  -	if ( ch == ' ' || ch == '\t' || ch == '\n' )
  -	    return true;
  -    }
  -    return false;
  -#else
  -    OSStatus status = 0, findStatus = 0;
  +    OSStatus status = 0, findStatus = -1;
       static TextBreakLocatorRef breakLocator = 0;
  -    UniCharArrayOffset end;
  -    const QChar *c = s+pos;
  -    unsigned short ch = c->unicode();
  -
  -    // Newlines and spaces are always breakable, as are non-breaking spaces when breakNBSP is true.
  -    // FIXME: Tiger is not allowing breaks on spaces after bullet characters like &#183;.  We don't know
  -    // at the moment whether this behavior is correct or not.  Since Tiger is also not allowing breaks on spaces
  -    // after hyphen-like characters, this does not seem ideal for the Web.  Therefore for now we override space
  -    // characters up front and bypass the Unicode line breaking routines.
  -    if (ch == ' ' || ch == '\n' || ch == '\t' || (breakNBSP && ch == 0xa0))
  -        return true;
  -
  -    // If current character, or the previous character aren't simple latin1 then
  -    // use the UC line break locator.  UCFindTextBreak will report false if we
  -    // have a sequence of 0xa0 0x20 (nbsp, sp), so we explicity check for that
  -    // case.
  -    unsigned short lastCh = pos > 0 ? (s+pos-1)->unicode() : 0;
  -    if ((ch > 0x7f && ch != 0xa0) || (lastCh > 0x7f && lastCh != 0xa0)) {
  -        if (!breakLocator)
  -            status = UCCreateTextBreakLocator(NULL, 0, kUCTextBreakLineMask, &breakLocator);
  -        if (status == 0)
  -            findStatus = UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, (const UniChar *)s, len, pos, &end);
  -        if (findStatus == 0)
  -            return (int)end == pos && !(lastCh == ' ' || lastCh == '\n' || lastCh == '\t' || (breakNBSP && lastCh == 0xa0));
  -
  -        // If Carbon fails, fail back on simple white space detection.
  -    }
  +    int nextUCBreak = -1;
  +    int i;
  +    unsigned short ch, lastCh;

  -    // Match WinIE's breaking strategy, which is to always allow breaks after hyphens and question marks.
  -    if (lastCh == '-' || lastCh == '?')
  -        return true;
  -
  -    // No other ascii chars are breakable.
  -    return false;
  -#endif    
  +    lastCh = pos > 0 ? str[pos - 1].unicode() : 0;
  +    for (i = pos; i < len; i++) {
  +        ch = str[i].unicode();
  +        if (ch == ' ' || ch == '\n' || ch == '\t' || (breakNBSP && ch == 0xa0))
  +            break;
  +        // Match WinIE's breaking strategy, which is to always allow breaks after hyphens and question marks.
  +        if (lastCh == '-' || lastCh == '?')
  +            break;
  +        // If current character, or the previous character aren't simple latin1 then
  +        // use the UC line break locator.  UCFindTextBreak will report false if we
  +        // have a sequence of 0xa0 0x20 (nbsp, sp), so we explicity check for that
  +        // case.
  +        if ((ch > 0x7f && ch != 0xa0) || (lastCh > 0x7f && lastCh != 0xa0)) {
  +            if (nextUCBreak < i) {
  +                if (!breakLocator)
  +                    status = UCCreateTextBreakLocator(NULL, 0, kUCTextBreakLineMask, &breakLocator);
  +                if (status == 0)
  +                    findStatus = UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, (const UniChar *)str, len, i, (UniCharArrayOffset *)&nextUCBreak);
  +            }
  +            if (findStatus == 0 && i == nextUCBreak && !(lastCh == ' ' || lastCh == '\n' || lastCh == '\t' || (breakNBSP && lastCh == 0xa0)))
  +                break;
  +        }
  +        lastCh = ch;
  +    }
  +    return i;
   }

  -};
  +};
  \ No newline at end of file

  1.3       +34 -3     WebCore/khtml/rendering/break_lines.h

  Index: break_lines.h
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/rendering/break_lines.h,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- break_lines.h	5 Oct 2004 20:05:38 -0000	1.2
  +++ break_lines.h	4 Sep 2005 07:42:31 -0000	1.3
  @@ -1,5 +1,36 @@
  -#include <qstring.h>
  +/*
  + * This file is part of the DOM implementation for KDE.
  + *
  + * Copyright (C) 2005 Apple Computer, Inc.
  + *
  + * This library is free software; you can redistribute it and/or
  + * modify it under the terms of the GNU Library General Public
  + * License as published by the Free Software Foundation; either
  + * version 2 of the License, or (at your option) any later version.
  + *
  + * This library is distributed in the hope that it will be useful,
  + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  + * Library General Public License for more details.
  + *
  + * You should have received a copy of the GNU Library General Public License
  + * along with this library; see the file COPYING.LIB.  If not, write to
  + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  + * Boston, MA 02111-1307, USA.
  + *
  + */
  +
  +class QChar;

   namespace khtml {
  -    bool isBreakable(const QChar *str, int pos, int len, bool breakNBSP = false);
  -};
  +
  +    int nextBreakablePosition(const QChar *str, int pos, int len, bool breakNBSP = false);
  +
  +    inline bool isBreakable(const QChar *str, int pos, int len, int &nextBreakable, bool breakNBSP = false)
  +    {
  +        if (pos > nextBreakable)
  +            nextBreakable = nextBreakablePosition(str, pos, len, breakNBSP);
  +        return pos == nextBreakable;
  +    }
  +
  +}

  1.196     +19 -13    WebCore/khtml/rendering/render_text.cpp

  Index: render_text.cpp
  ===================================================================
  RCS file: /cvs/root/WebCore/khtml/rendering/render_text.cpp,v
  retrieving revision 1.195
  retrieving revision 1.196
  diff -u -r1.195 -r1.196
  --- render_text.cpp	1 Sep 2005 18:03:33 -0000	1.195
  +++ render_text.cpp	4 Sep 2005 07:42:31 -0000	1.196
  @@ -1308,15 +1308,17 @@
       const Font *f = htmlFont( false );
       int wordSpacing = style()->wordSpacing();
       int len = str->l;
  +    QChar *txt = str->s;
       bool needsWordSpacing = false;
       bool ignoringSpaces = false;
       bool isSpace = false;
       bool isPre = style()->whiteSpace() == PRE;
       bool firstWord = true;
       bool firstLine = true;
  +    int nextBreakable = -1;
       for(int i = 0; i < len; i++)
       {
  -        const QChar c = str->s[i];
  +        QChar c = txt[i];

           bool previousCharacterIsSpace = isSpace;

  @@ -1354,12 +1356,18 @@
               continue;
           }

  -        int wordlen = 0;
  -        while (i+wordlen < len && str->s[i+wordlen] != '\n' && str->s[i+wordlen] != ' ' && str->s[i+wordlen] != '\t' &&
  -               (i+wordlen == 0 || str->s[i+wordlen].unicode() != SOFT_HYPHEN) && // Skip soft hyphens
  -               (wordlen == 0 || !isBreakable( str->s, i+wordlen, str->l)))
  -            wordlen++;
  +        bool hasBreak = isBreakable(txt, i, len, nextBreakable);
  +        int j = i;
  +        while (c != '\n' && c != ' ' && c != '\t' && (j == 0 || c.unicode() != SOFT_HYPHEN)) {
  +            j++;
  +            if (j == len)
  +                break;
  +            c = txt[j];
  +            if (isBreakable(txt, j, len, nextBreakable))
  +                break;
  +        }

  +        int wordlen = j - i;
           if (wordlen)
           {
   #if !APPLE_CHANGES
  @@ -1370,15 +1378,14 @@
               currMinWidth += w;
               currMaxWidth += w;

  -            bool isBreakableCharSpace = (i+wordlen < len) ? ((!isPre && (str->s[i+wordlen] == '\n' || str->s[i+wordlen] == '\t')) || 
  -                                                             str->s[i+wordlen] == ' ') : false;
  +            bool isBreakableCharSpace = (j < len) && ((!isPre && (c == '\n' || c == '\t')) || c == ' ');

  -            if (i+wordlen < len && style()->whiteSpace() == NORMAL)
  +            if (j < len && style()->whiteSpace() == NORMAL)
                   m_hasBreakableChar = true;

               // Add in wordSpacing to our currMaxWidth, but not if this is the last word on a line or the
               // last word in the run.
  -            if (wordSpacing && isBreakableCharSpace && !containsOnlyWhitespace(i+wordlen, len-(i+wordlen)))
  +            if (wordSpacing && isBreakableCharSpace && !containsOnlyWhitespace(j, len-j))
                   currMaxWidth += wordSpacing;

               if (firstWord) {
  @@ -1386,7 +1393,6 @@
                   // If the first character in the run is breakable, then we consider ourselves to have a beginning
                   // minimum width of 0, since a break could occur right before our run starts, preventing us from ever
                   // being appended to a previous text run when considering the total minimum width of the containing block.
  -                bool hasBreak = isBreakable(str->s, i, str->l);
                   if (hasBreak)
                       m_hasBreakableChar = true;
                   m_beginMinWidth = hasBreak ? 0 : w;
  @@ -1420,8 +1426,8 @@
               }
               else
               {
  -                currMaxWidth += f->width(str->s, str->l, i + wordlen, 1, tabWidth(), leadWidth + currMaxWidth);
  -                needsWordSpacing = isSpace && !previousCharacterIsSpace && i + wordlen == len-1;
  +                currMaxWidth += f->width(txt, len, i, 1, tabWidth(), leadWidth + currMaxWidth);
  +                needsWordSpacing = isSpace && !previousCharacterIsSpace && i == len-1;
               }
           }
       }