| 1 | /* |
|---|
| 2 | DOM WordWalker Library |
|---|
| 3 | $Id$ |
|---|
| 4 | |
|---|
| 5 | Copyright (c) 2006, Six Apart, Ltd. |
|---|
| 6 | All rights reserved. |
|---|
| 7 | |
|---|
| 8 | Redistribution and use in source and binary forms, with or without |
|---|
| 9 | modification, are permitted provided that the following conditions are |
|---|
| 10 | met: |
|---|
| 11 | |
|---|
| 12 | * Redistributions of source code must retain the above copyright |
|---|
| 13 | notice, this list of conditions and the following disclaimer. |
|---|
| 14 | |
|---|
| 15 | * Redistributions in binary form must reproduce the above |
|---|
| 16 | copyright notice, this list of conditions and the following disclaimer |
|---|
| 17 | in the documentation and/or other materials provided with the |
|---|
| 18 | distribution. |
|---|
| 19 | |
|---|
| 20 | * Neither the name of "Six Apart" nor the names of its |
|---|
| 21 | contributors may be used to endorse or promote products derived from |
|---|
| 22 | this software without specific prior written permission. |
|---|
| 23 | |
|---|
| 24 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|---|
| 25 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|---|
| 26 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|---|
| 27 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|---|
| 28 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|---|
| 29 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|---|
| 30 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|---|
| 31 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|---|
| 32 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|---|
| 33 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|---|
| 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 35 | */ |
|---|
| 36 | |
|---|
| 37 | WordWalker = new Class( Object, { |
|---|
| 38 | WORD_START: /[a-zA-Z0-9\u00C0-\u00FF\u0101-\u017F]/, |
|---|
| 39 | /*- |
|---|
| 40 | * Note: To support French and Italian, either the server or client must break between apostrophes and vowels (i.e., "l'auberge"). |
|---|
| 41 | * Currently, this client part does not do that. |
|---|
| 42 | * Note: There is no support for lower-case (see the comment below, "Break Quoted") in Latin-Extended-A and Latin-Extended-B. |
|---|
| 43 | * Note: ** KEEP "Word Boundary Rules" DOCUMENTATION ABOVE UP-TO-DATE ** |
|---|
| 44 | */ |
|---|
| 45 | // Break after '.' char.: | Break quoted & '(s)' | Break-if-not-x-and-not-followed-by section: |
|---|
| 46 | WORD_END: /\.(?=[^a-zA-Z\u00C0-\u00FF\u0101-\u017F0-9'\u2018,][^a-z\u00DF-\u00FF]|[\s]*$)|['"\u2018\u2019\u201c\u201d](?=\s|\)|\]|,)|[^a-zA-Z0-9\u00C0-\u00FF\u0101-\u017F\u2018\-\.'](?!\.\d)|s(?=\))/, |
|---|
| 47 | |
|---|
| 48 | // | Skip '(s)' and other optional pluralities (see if this can be improved for German and Italian; i.e., their equivalents of "documents(s)"). |
|---|
| 49 | |
|---|
| 50 | init: function( rootNode, nodeFilter ) { |
|---|
| 51 | this.rootNode = rootNode; |
|---|
| 52 | this.nodeFilter = nodeFilter; |
|---|
| 53 | this.reset(); |
|---|
| 54 | }, |
|---|
| 55 | |
|---|
| 56 | |
|---|
| 57 | reset: function() { |
|---|
| 58 | this.range = undefined; |
|---|
| 59 | this.word = undefined; |
|---|
| 60 | this.length = 0; |
|---|
| 61 | return undefined; |
|---|
| 62 | }, |
|---|
| 63 | |
|---|
| 64 | |
|---|
| 65 | getNextWord: function() { |
|---|
| 66 | if( defined( this.range ) ) { |
|---|
| 67 | this.range.setStart( this.range.endContainer, this.range.endOffset ); |
|---|
| 68 | this.range.collapse( true ); |
|---|
| 69 | } else { |
|---|
| 70 | this.reset(); |
|---|
| 71 | var node = DOM.Proxy.getNextTextNode( this.rootNode ); |
|---|
| 72 | if( !node ) |
|---|
| 73 | return undefined; |
|---|
| 74 | this.range = new SelectionRange( node, 0 ); |
|---|
| 75 | this.range.collapse( true ); |
|---|
| 76 | } |
|---|
| 77 | |
|---|
| 78 | this.word = undefined; |
|---|
| 79 | |
|---|
| 80 | /* find word start */ |
|---|
| 81 | var proxy = new DOM.Proxy( this.range.startContainer ); |
|---|
| 82 | while( defined( proxy.node ) && !defined( this.word ) ) { |
|---|
| 83 | if( proxy.node.nodeType == Node.TEXT_NODE ) { |
|---|
| 84 | if( proxy.node != this.range.startContainer ) |
|---|
| 85 | this.range.setStart( proxy.node, 0 ); |
|---|
| 86 | |
|---|
| 87 | var value = proxy.node.nodeValue; |
|---|
| 88 | var sub = value.substring( this.range.startOffset, value.length ); |
|---|
| 89 | |
|---|
| 90 | var offset = sub.search( this.WORD_START ); |
|---|
| 91 | if( offset >= 0 ) |
|---|
| 92 | this.word = ""; |
|---|
| 93 | else |
|---|
| 94 | offset = sub.length; |
|---|
| 95 | this.range.startOffset += offset; |
|---|
| 96 | this.range.collapse( true ); |
|---|
| 97 | } |
|---|
| 98 | |
|---|
| 99 | if( !defined( this.word ) ) |
|---|
| 100 | proxy.getNextNode(); |
|---|
| 101 | if( proxy.node === this.rootNode ) |
|---|
| 102 | break; |
|---|
| 103 | } |
|---|
| 104 | |
|---|
| 105 | if( !defined( this.word ) ) |
|---|
| 106 | return this.reset(); |
|---|
| 107 | |
|---|
| 108 | /* find word end */ |
|---|
| 109 | var done = false; |
|---|
| 110 | while( !done && defined( proxy.node ) ) { |
|---|
| 111 | if( proxy.node.nodeType == Node.TEXT_NODE ) { |
|---|
| 112 | if( proxy.node != this.range.startContainer ) |
|---|
| 113 | this.range.setEnd( proxy.node, 0 ); |
|---|
| 114 | |
|---|
| 115 | var value = proxy.node.nodeValue; |
|---|
| 116 | var sub = value.substring( this.range.endOffset, value.length ); |
|---|
| 117 | |
|---|
| 118 | var offset = sub.search( this.WORD_END ); |
|---|
| 119 | if( offset >= 0 ) |
|---|
| 120 | done = true; |
|---|
| 121 | else |
|---|
| 122 | offset = sub.length; |
|---|
| 123 | this.range.endOffset += offset; |
|---|
| 124 | this.word += sub.substr( 0, offset ); |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | proxy.getNextNode(); |
|---|
| 128 | if( !defined( proxy.node ) || proxy.node === this.rootNode || !DOM.isInlineNode( proxy.node ) ) |
|---|
| 129 | break; |
|---|
| 130 | } |
|---|
| 131 | |
|---|
| 132 | /* end of document */ |
|---|
| 133 | if( this.word.length == 0 ) |
|---|
| 134 | return this.reset(); |
|---|
| 135 | |
|---|
| 136 | this.length++; |
|---|
| 137 | return this.word; |
|---|
| 138 | } |
|---|
| 139 | } ); |
|---|