| 1 | package Locale::Maketext::Guts; |
|---|
| 2 | |
|---|
| 3 | $VERSION = '1.13'; |
|---|
| 4 | |
|---|
| 5 | BEGIN { |
|---|
| 6 | # Just so we're nice and define SOMETHING in "our" package. |
|---|
| 7 | *zorp = sub { return scalar @_ } unless defined &zorp; |
|---|
| 8 | } |
|---|
| 9 | |
|---|
| 10 | package Locale::Maketext; |
|---|
| 11 | use strict; |
|---|
| 12 | use vars qw($USE_LITERALS $GUTSPATH); |
|---|
| 13 | |
|---|
| 14 | BEGIN { |
|---|
| 15 | $GUTSPATH = __FILE__; |
|---|
| 16 | *DEBUG = sub () {0} unless defined &DEBUG; |
|---|
| 17 | } |
|---|
| 18 | |
|---|
| 19 | use utf8; |
|---|
| 20 | |
|---|
| 21 | sub _compile { |
|---|
| 22 | # This big scary routine compiles an entry. |
|---|
| 23 | # It returns either a coderef if there's brackety bits in this, or |
|---|
| 24 | # otherwise a ref to a scalar. |
|---|
| 25 | |
|---|
| 26 | my $target = ref($_[0]) || $_[0]; |
|---|
| 27 | |
|---|
| 28 | my(@code); |
|---|
| 29 | my(@c) = (''); # "chunks" -- scratch. |
|---|
| 30 | my $call_count = 0; |
|---|
| 31 | my $big_pile = ''; |
|---|
| 32 | { |
|---|
| 33 | my $in_group = 0; # start out outside a group |
|---|
| 34 | my($m, @params); # scratch |
|---|
| 35 | |
|---|
| 36 | while($_[1] =~ # Iterate over chunks. |
|---|
| 37 | m/\G( |
|---|
| 38 | [^\~\[\]]+ # non-~[] stuff |
|---|
| 39 | | |
|---|
| 40 | ~. # ~[, ~], ~~, ~other |
|---|
| 41 | | |
|---|
| 42 | \[ # [ presumably opening a group |
|---|
| 43 | | |
|---|
| 44 | \] # ] presumably closing a group |
|---|
| 45 | | |
|---|
| 46 | ~ # terminal ~ ? |
|---|
| 47 | | |
|---|
| 48 | $ |
|---|
| 49 | )/xgs |
|---|
| 50 | ) { |
|---|
| 51 | DEBUG>2 and print qq{ "$1"\n}; |
|---|
| 52 | |
|---|
| 53 | if($1 eq '[' or $1 eq '') { # "[" or end |
|---|
| 54 | # Whether this is "[" or end, force processing of any |
|---|
| 55 | # preceding literal. |
|---|
| 56 | if($in_group) { |
|---|
| 57 | if($1 eq '') { |
|---|
| 58 | $target->_die_pointing($_[1], 'Unterminated bracket group'); |
|---|
| 59 | } |
|---|
| 60 | else { |
|---|
| 61 | $target->_die_pointing($_[1], 'You can\'t nest bracket groups'); |
|---|
| 62 | } |
|---|
| 63 | } |
|---|
| 64 | else { |
|---|
| 65 | if ($1 eq '') { |
|---|
| 66 | DEBUG>2 and print " [end-string]\n"; |
|---|
| 67 | } |
|---|
| 68 | else { |
|---|
| 69 | $in_group = 1; |
|---|
| 70 | } |
|---|
| 71 | die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity |
|---|
| 72 | if(length $c[-1]) { |
|---|
| 73 | # Now actually processing the preceding literal |
|---|
| 74 | $big_pile .= $c[-1]; |
|---|
| 75 | if($USE_LITERALS and ( |
|---|
| 76 | (ord('A') == 65) |
|---|
| 77 | ? $c[-1] !~ m/[^\x20-\x7E]/s |
|---|
| 78 | # ASCII very safe chars |
|---|
| 79 | : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s |
|---|
| 80 | # EBCDIC very safe chars |
|---|
| 81 | )) { |
|---|
| 82 | # normal case -- all very safe chars |
|---|
| 83 | $c[-1] =~ s/'/\\'/g; |
|---|
| 84 | push @code, q{ '} . $c[-1] . "',\n"; |
|---|
| 85 | $c[-1] = ''; # reuse this slot |
|---|
| 86 | } |
|---|
| 87 | else { |
|---|
| 88 | push @code, ' $c[' . $#c . "],\n"; |
|---|
| 89 | push @c, ''; # new chunk |
|---|
| 90 | } |
|---|
| 91 | } |
|---|
| 92 | # else just ignore the empty string. |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | } |
|---|
| 96 | elsif($1 eq ']') { # "]" |
|---|
| 97 | # close group -- go back in-band |
|---|
| 98 | if($in_group) { |
|---|
| 99 | $in_group = 0; |
|---|
| 100 | |
|---|
| 101 | DEBUG>2 and print " --Closing group [$c[-1]]\n"; |
|---|
| 102 | |
|---|
| 103 | # And now process the group... |
|---|
| 104 | |
|---|
| 105 | if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { |
|---|
| 106 | DEBUG > 2 and print " -- (Ignoring)\n"; |
|---|
| 107 | $c[-1] = ''; # reset out chink |
|---|
| 108 | next; |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | #$c[-1] =~ s/^\s+//s; |
|---|
| 112 | #$c[-1] =~ s/\s+$//s; |
|---|
| 113 | ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/ |
|---|
| 114 | |
|---|
| 115 | # A bit of a hack -- we've turned "~,"'s into DELs, so turn |
|---|
| 116 | # 'em into real commas here. |
|---|
| 117 | if (ord('A') == 65) { # ASCII, etc |
|---|
| 118 | foreach($m, @params) { tr/\x7F/,/ } |
|---|
| 119 | } |
|---|
| 120 | else { # EBCDIC (1047, 0037, POSIX-BC) |
|---|
| 121 | # Thanks to Peter Prymmer for the EBCDIC handling |
|---|
| 122 | foreach($m, @params) { tr/\x07/,/ } |
|---|
| 123 | } |
|---|
| 124 | |
|---|
| 125 | # Special-case handling of some method names: |
|---|
| 126 | if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) { |
|---|
| 127 | # Treat [_1,...] as [,_1,...], etc. |
|---|
| 128 | unshift @params, $m; |
|---|
| 129 | $m = ''; |
|---|
| 130 | } |
|---|
| 131 | elsif($m eq '*') { |
|---|
| 132 | $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" |
|---|
| 133 | } |
|---|
| 134 | elsif($m eq '#') { |
|---|
| 135 | $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | # Most common case: a simple, legal-looking method name |
|---|
| 139 | if($m eq '') { |
|---|
| 140 | # 0-length method name means to just interpolate: |
|---|
| 141 | push @code, ' ('; |
|---|
| 142 | } |
|---|
| 143 | elsif($m =~ /^\w+(?:\:\:\w+)*$/s |
|---|
| 144 | and $m !~ m/(?:^|\:)\d/s |
|---|
| 145 | # exclude starting a (sub)package or symbol with a digit |
|---|
| 146 | ) { |
|---|
| 147 | # Yes, it even supports the demented (and undocumented?) |
|---|
| 148 | # $obj->Foo::bar(...) syntax. |
|---|
| 149 | $target->_die_pointing( |
|---|
| 150 | $_[1], q{Can't use "SUPER::" in a bracket-group method}, |
|---|
| 151 | 2 + length($c[-1]) |
|---|
| 152 | ) |
|---|
| 153 | if $m =~ m/^SUPER::/s; |
|---|
| 154 | # Because for SUPER:: to work, we'd have to compile this into |
|---|
| 155 | # the right package, and that seems just not worth the bother, |
|---|
| 156 | # unless someone convinces me otherwise. |
|---|
| 157 | |
|---|
| 158 | push @code, ' $_[0]->' . $m . '('; |
|---|
| 159 | } |
|---|
| 160 | else { |
|---|
| 161 | # TODO: implement something? or just too icky to consider? |
|---|
| 162 | $target->_die_pointing( |
|---|
| 163 | $_[1], |
|---|
| 164 | "Can't use \"$m\" as a method name in bracket group", |
|---|
| 165 | 2 + length($c[-1]) |
|---|
| 166 | ); |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | pop @c; # we don't need that chunk anymore |
|---|
| 170 | ++$call_count; |
|---|
| 171 | |
|---|
| 172 | foreach my $p (@params) { |
|---|
| 173 | if($p eq '_*') { |
|---|
| 174 | # Meaning: all parameters except $_[0] |
|---|
| 175 | $code[-1] .= ' @_[1 .. $#_], '; |
|---|
| 176 | # and yes, that does the right thing for all @_ < 3 |
|---|
| 177 | } |
|---|
| 178 | elsif($p =~ m/^_(-?\d+)$/s) { |
|---|
| 179 | # _3 meaning $_[3] |
|---|
| 180 | $code[-1] .= '$_[' . (0 + $1) . '], '; |
|---|
| 181 | } |
|---|
| 182 | elsif($USE_LITERALS and ( |
|---|
| 183 | (ord('A') == 65) |
|---|
| 184 | ? $p !~ m/[^\x20-\x7E]/s |
|---|
| 185 | # ASCII very safe chars |
|---|
| 186 | : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s |
|---|
| 187 | # EBCDIC very safe chars |
|---|
| 188 | )) { |
|---|
| 189 | # Normal case: a literal containing only safe characters |
|---|
| 190 | $p =~ s/'/\\'/g; |
|---|
| 191 | $code[-1] .= q{'} . $p . q{', }; |
|---|
| 192 | } |
|---|
| 193 | else { |
|---|
| 194 | # Stow it on the chunk-stack, and just refer to that. |
|---|
| 195 | push @c, $p; |
|---|
| 196 | push @code, ' $c[' . $#c . '], '; |
|---|
| 197 | } |
|---|
| 198 | } |
|---|
| 199 | $code[-1] .= "),\n"; |
|---|
| 200 | |
|---|
| 201 | push @c, ''; |
|---|
| 202 | } |
|---|
| 203 | else { |
|---|
| 204 | $target->_die_pointing($_[1], q{Unbalanced ']'}); |
|---|
| 205 | } |
|---|
| 206 | |
|---|
| 207 | } |
|---|
| 208 | elsif(substr($1,0,1) ne '~') { |
|---|
| 209 | # it's stuff not containing "~" or "[" or "]" |
|---|
| 210 | # i.e., a literal blob |
|---|
| 211 | $c[-1] .= $1; |
|---|
| 212 | |
|---|
| 213 | } |
|---|
| 214 | elsif($1 eq '~~') { # "~~" |
|---|
| 215 | $c[-1] .= '~'; |
|---|
| 216 | |
|---|
| 217 | } |
|---|
| 218 | elsif($1 eq '~[') { # "~[" |
|---|
| 219 | $c[-1] .= '['; |
|---|
| 220 | |
|---|
| 221 | } |
|---|
| 222 | elsif($1 eq '~]') { # "~]" |
|---|
| 223 | $c[-1] .= ']'; |
|---|
| 224 | |
|---|
| 225 | } |
|---|
| 226 | elsif($1 eq '~,') { # "~," |
|---|
| 227 | if($in_group) { |
|---|
| 228 | # This is a hack, based on the assumption that no-one will actually |
|---|
| 229 | # want a DEL inside a bracket group. Let's hope that's it's true. |
|---|
| 230 | if (ord('A') == 65) { # ASCII etc |
|---|
| 231 | $c[-1] .= "\x7F"; |
|---|
| 232 | } |
|---|
| 233 | else { # EBCDIC (cp 1047, 0037, POSIX-BC) |
|---|
| 234 | $c[-1] .= "\x07"; |
|---|
| 235 | } |
|---|
| 236 | } |
|---|
| 237 | else { |
|---|
| 238 | $c[-1] .= '~,'; |
|---|
| 239 | } |
|---|
| 240 | |
|---|
| 241 | } |
|---|
| 242 | elsif($1 eq '~') { # possible only at string-end, it seems. |
|---|
| 243 | $c[-1] .= '~'; |
|---|
| 244 | |
|---|
| 245 | } |
|---|
| 246 | else { |
|---|
| 247 | # It's a "~X" where X is not a special character. |
|---|
| 248 | # Consider it a literal ~ and X. |
|---|
| 249 | $c[-1] .= $1; |
|---|
| 250 | } |
|---|
| 251 | } |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | if($call_count) { |
|---|
| 255 | undef $big_pile; # Well, nevermind that. |
|---|
| 256 | } |
|---|
| 257 | else { |
|---|
| 258 | # It's all literals! Ahwell, that can happen. |
|---|
| 259 | # So don't bother with the eval. Return a SCALAR reference. |
|---|
| 260 | return \$big_pile; |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity |
|---|
| 264 | DEBUG and warn scalar(@c), " chunks under closure\n"; |
|---|
| 265 | if(@code == 0) { # not possible? |
|---|
| 266 | DEBUG and warn "Empty code\n"; |
|---|
| 267 | return \''; |
|---|
| 268 | } |
|---|
| 269 | elsif(@code > 1) { # most cases, presumably! |
|---|
| 270 | unshift @code, "join '',\n"; |
|---|
| 271 | } |
|---|
| 272 | unshift @code, "use strict; sub {\n"; |
|---|
| 273 | push @code, "}\n"; |
|---|
| 274 | |
|---|
| 275 | DEBUG and warn @code; |
|---|
| 276 | my $sub = eval(join '', @code); |
|---|
| 277 | die "$@ while evalling" . join('', @code) if $@; # Should be impossible. |
|---|
| 278 | return $sub; |
|---|
| 279 | } |
|---|
| 280 | |
|---|
| 281 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|---|
| 282 | |
|---|
| 283 | sub _die_pointing { |
|---|
| 284 | # This is used by _compile to throw a fatal error |
|---|
| 285 | my $target = shift; # class name |
|---|
| 286 | # ...leaving $_[0] the error-causing text, and $_[1] the error message |
|---|
| 287 | |
|---|
| 288 | my $i = index($_[0], "\n"); |
|---|
| 289 | |
|---|
| 290 | my $pointy; |
|---|
| 291 | my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; |
|---|
| 292 | if($pos < 1) { |
|---|
| 293 | $pointy = "^=== near there\n"; |
|---|
| 294 | } |
|---|
| 295 | else { # we need to space over |
|---|
| 296 | my $first_tab = index($_[0], "\t"); |
|---|
| 297 | if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { |
|---|
| 298 | # No tabs, or the first tab is harmlessly after where we will point to, |
|---|
| 299 | # AND we're far enough from the margin that we can draw a proper arrow. |
|---|
| 300 | $pointy = ('=' x $pos) . "^ near there\n"; |
|---|
| 301 | } |
|---|
| 302 | else { |
|---|
| 303 | # tabs screw everything up! |
|---|
| 304 | $pointy = substr($_[0],0,$pos); |
|---|
| 305 | $pointy =~ tr/\t //cd; |
|---|
| 306 | # make everything into whitespace, but preseving tabs |
|---|
| 307 | $pointy .= "^=== near there\n"; |
|---|
| 308 | } |
|---|
| 309 | } |
|---|
| 310 | |
|---|
| 311 | my $errmsg = "$_[1], in\:\n$_[0]"; |
|---|
| 312 | |
|---|
| 313 | if($i == -1) { |
|---|
| 314 | # No newline. |
|---|
| 315 | $errmsg .= "\n" . $pointy; |
|---|
| 316 | } |
|---|
| 317 | elsif($i == (length($_[0]) - 1) ) { |
|---|
| 318 | # Already has a newline at end. |
|---|
| 319 | $errmsg .= $pointy; |
|---|
| 320 | } |
|---|
| 321 | else { |
|---|
| 322 | # don't bother with the pointy bit, I guess. |
|---|
| 323 | } |
|---|
| 324 | Carp::croak( "$errmsg via $target, as used" ); |
|---|
| 325 | } |
|---|
| 326 | |
|---|
| 327 | 1; |
|---|
| 328 | |
|---|