root/trunk/common/WordWalker.js

Revision 159, 5.5 kB (checked in by ydnar, 3 years ago)

added line

  • Property svn:keywords set to Id
Line 
1/*
2DOM WordWalker Library
3$Id$
4
5Copyright (c) 2006, Six Apart, Ltd.
6All rights reserved.
7
8Redistribution and use in source and binary forms, with or without
9modification, are permitted provided that the following conditions are
10met:
11
12    * Redistributions of source code must retain the above copyright
13notice, this list of conditions and the following disclaimer.
14
15    * Redistributions in binary form must reproduce the above
16copyright notice, this list of conditions and the following disclaimer
17in the documentation and/or other materials provided with the
18distribution.
19
20    * Neither the name of "Six Apart" nor the names of its
21contributors may be used to endorse or promote products derived from
22this software without specific prior written permission.
23
24THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35*/
36
37WordWalker = new Class( Object, {
38    WORD_START: /[a-zA-Z0-9\u00C0-\u00FF\u0101-\u017F]/,                                     
39    /*-
40     * Note: To support French and Italian, either the server or client must break between apostrophes and vowels (i.e., "l'auberge").
41     *       Currently, this client part does not do that.
42     * Note: There is no support for lower-case (see the comment below, "Break Quoted") in Latin-Extended-A and Latin-Extended-B.
43     * Note: ** KEEP "Word Boundary Rules" DOCUMENTATION ABOVE UP-TO-DATE **
44     */
45    //          Break after '.' char.:                                                       | Break quoted & '(s)'                     | Break-if-not-x-and-not-followed-by section:
46    WORD_END: /\.(?=[^a-zA-Z\u00C0-\u00FF\u0101-\u017F0-9'\u2018,][^a-z\u00DF-\u00FF]|[\s]*$)|['"\u2018\u2019\u201c\u201d](?=\s|\)|\]|,)|[^a-zA-Z0-9\u00C0-\u00FF\u0101-\u017F\u2018\-\.'](?!\.\d)|s(?=\))/, 
47
48     //           | Skip '(s)' and other optional pluralities (see if this can be improved for German and Italian; i.e., their equivalents of "documents(s)").
49
50    init: function( rootNode, nodeFilter ) {
51        this.rootNode = rootNode;
52        this.nodeFilter = nodeFilter;
53        this.reset();
54    },
55   
56   
57    reset: function() {
58        this.range = undefined;
59        this.word = undefined;
60        this.length = 0;
61        return undefined;
62    },
63   
64   
65    getNextWord: function() {
66        if( defined( this.range ) ) {
67            this.range.setStart( this.range.endContainer, this.range.endOffset );
68            this.range.collapse( true );
69        } else {
70            this.reset();
71            var node = DOM.Proxy.getNextTextNode( this.rootNode );
72            if( !node )
73                return undefined;
74            this.range = new SelectionRange( node, 0 );
75            this.range.collapse( true );
76        }
77       
78        this.word = undefined;
79       
80        /* find word start */
81        var proxy = new DOM.Proxy( this.range.startContainer );
82        while( defined( proxy.node ) && !defined( this.word ) ) {
83            if( proxy.node.nodeType == Node.TEXT_NODE ) {
84                if( proxy.node != this.range.startContainer )
85                    this.range.setStart( proxy.node, 0 );
86               
87                var value = proxy.node.nodeValue;
88                var sub = value.substring( this.range.startOffset, value.length );
89               
90                var offset = sub.search( this.WORD_START );
91                if( offset >= 0 )
92                    this.word = "";
93                else
94                    offset = sub.length;
95                this.range.startOffset += offset;
96                this.range.collapse( true );
97            }
98           
99            if( !defined( this.word ) )
100                proxy.getNextNode();
101            if( proxy.node === this.rootNode )
102                break;
103        }
104       
105        if( !defined( this.word ) )
106            return this.reset();
107       
108        /* find word end */
109        var done = false;
110        while( !done && defined( proxy.node ) ) {
111            if( proxy.node.nodeType == Node.TEXT_NODE ) {
112                if( proxy.node != this.range.startContainer )
113                    this.range.setEnd( proxy.node, 0 );
114               
115                var value = proxy.node.nodeValue;
116                var sub = value.substring( this.range.endOffset, value.length );
117
118                var offset = sub.search( this.WORD_END );
119                if( offset >= 0 ) 
120                    done = true;
121                else
122                    offset = sub.length;
123                this.range.endOffset += offset;
124                this.word += sub.substr( 0, offset );
125            }
126
127            proxy.getNextNode();
128            if( !defined( proxy.node ) || proxy.node === this.rootNode || !DOM.isInlineNode( proxy.node ) )
129                break;
130        }
131       
132        /* end of document */
133        if( this.word.length == 0 )
134            return this.reset();
135       
136        this.length++;
137        return this.word;
138    }
139} );
Note: See TracBrowser for help on using the browser.