Changeset 2594
- Timestamp:
- 06/18/08 02:03:47 (6 months ago)
- Files:
-
- branches/release-40/extlib/I18N/LangTags.pm (modified) (7 diffs)
- branches/release-40/extlib/I18N/LangTags/Detect.pm (added)
- branches/release-40/extlib/I18N/LangTags/List.pm (modified) (47 diffs)
- branches/release-40/extlib/Locale/Maketext.pm (modified) (11 diffs)
- branches/release-40/extlib/Locale/Maketext.pod (modified) (40 diffs)
- branches/release-40/extlib/Locale/Maketext/Guts.pm (added)
- branches/release-40/extlib/Locale/Maketext/GutsLoader.pm (added)
- branches/release-40/extlib/Locale/Maketext/TPJ13.pod (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/release-40/extlib/I18N/LangTags.pm
r1098 r2594 1 1 2 # Time-stamp: "200 2-02-02 20:43:03 MST"2 # Time-stamp: "2004-10-06 23:26:33 ADT" 3 3 # Sean M. Burke <sburke@cpan.org> 4 4 … … 15 15 locale2language_tag alternate_language_tags 16 16 encode_language_tag panic_languages 17 implicate_supers 18 implicate_supers_strictly 17 19 ); 18 20 %EXPORT_TAGS = ('ALL' => \@EXPORT_OK); 19 21 20 $VERSION = "0.27"; 22 $VERSION = "0.35"; 23 24 sub uniq { my %seen; return grep(!($seen{$_}++), @_); } # a util function 25 21 26 22 27 =head1 NAME … … 26 31 =head1 SYNOPSIS 27 32 28 use I18N::LangTags qw(is_language_tag same_language_tag 29 extract_language_tags super_languages 30 similarity_language_tag is_dialect_of 31 locale2language_tag alternate_language_tags 32 encode_language_tag panic_languages 33 ); 34 35 ...or whatever of those functions you want to import. Those are 36 all the exportable functions -- you're free to import only some, 37 or none at all. By default, none are imported. If you say: 33 use I18N::LangTags(); 34 35 ...or specify whichever of those functions you want to import, like so: 36 37 use I18N::LangTags qw(implicate_supers similarity_language_tag); 38 39 All the exportable functions are listed below -- you're free to import 40 only some, or none at all. By default, none are imported. If you 41 say: 38 42 39 43 use I18N::LangTags qw(:ALL) … … 400 404 401 405 $lang =~ tr<_><->; # "en_US" -> en-US 402 $lang =~ s<\.[-_a-zA-Z0-9\.]*><>s; # "en_US.ISO8859-1" -> en-US 406 $lang =~ s<(?:[\.\@][-_a-zA-Z0-9]+)+$><>s; # "en_US.ISO8859-1" -> en-US 407 # it_IT.utf8@euro => it-IT 403 408 404 409 return $lang if &is_language_tag($lang); … … 530 535 $tag =~ s/^iw\b/he/i; # Hebrew 531 536 $tag =~ s/^in\b/id/i; # Indonesian 537 $tag =~ s/^cre\b/cr/i; # Cree 538 $tag =~ s/^jw\b/jv/i; # Javanese 532 539 $tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger 533 540 $tag =~ s/^[ix]-navajo\b/nv/i; # Navajo 534 541 $tag =~ s/^ji\b/yi/i; # Yiddish 542 # SMB 2003 -- Hm. There's a bunch of new XXX->YY variances now, 543 # but maybe they're all so obscure I can ignore them. "Obscure" 544 # meaning either that the language is obscure, and/or that the 545 # XXX form was extant so briefly that it's unlikely it was ever 546 # used. I hope. 535 547 # 536 548 # These go FROM the simplex to complex form, to get … … 731 743 } 732 744 745 #--------------------------------------------------------------------------- 746 #--------------------------------------------------------------------------- 747 748 =item * the function implicate_supers( ...languages... ) 749 750 This takes a list of strings (which are presumed to be language-tags; 751 strings that aren't, are ignored); and after each one, this function 752 inserts super-ordinate forms that don't already appear in the list. 753 The original list, plus these insertions, is returned. 754 755 In other words, it takes this: 756 757 pt-br de-DE en-US fr pt-br-janeiro 758 759 and returns this: 760 761 pt-br pt de-DE de en-US en fr pt-br-janeiro 762 763 This function is most useful in the idiom 764 765 implicate_supers( I18N::LangTags::Detect::detect() ); 766 767 (See L<I18N::LangTags::Detect>.) 768 769 770 =item * the function implicate_supers_strictly( ...languages... ) 771 772 This works like C<implicate_supers> except that the implicated 773 forms are added to the end of the return list. 774 775 In other words, implicate_supers_strictly takes a list of strings 776 (which are presumed to be language-tags; strings that aren't, are 777 ignored) and after the whole given list, it inserts the super-ordinate forms 778 of all given tags, minus any tags that already appear in the input list. 779 780 In other words, it takes this: 781 782 pt-br de-DE en-US fr pt-br-janeiro 783 784 and returns this: 785 786 pt-br de-DE en-US fr pt-br-janeiro pt de en 787 788 The reason this function has "_strictly" in its name is that when 789 you're processing an Accept-Language list according to the RFCs, if 790 you interpret the RFCs quite strictly, then you would use 791 implicate_supers_strictly, but for normal use (i.e., common-sense use, 792 as far as I'm concerned) you'd use implicate_supers. 793 794 =cut 795 796 sub implicate_supers { 797 my @languages = grep is_language_tag($_), @_; 798 my %seen_encoded; 799 foreach my $lang (@languages) { 800 $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1 801 } 802 803 my(@output_languages); 804 foreach my $lang (@languages) { 805 push @output_languages, $lang; 806 foreach my $s ( I18N::LangTags::super_languages($lang) ) { 807 # Note that super_languages returns the longest first. 808 last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) }; 809 push @output_languages, $s; 810 } 811 } 812 return uniq( @output_languages ); 813 814 } 815 816 sub implicate_supers_strictly { 817 my @tags = grep is_language_tag($_), @_; 818 return uniq( @_, map super_languages($_), @_ ); 819 } 820 821 822 733 823 ########################################################################### 734 824 1; … … 771 861 C<http://www.perl.com/CPAN/modules/by-module/Locale/> 772 862 773 * ISO 639, "Code for the representation of names of languages",774 C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html>775 776 863 * ISO 639-2, "Codes for the representation of names of languages", 777 including t hree-letter codes,778 C<http:// lcweb.loc.gov/standards/iso639-2/bibcodes.html>864 including two-letter and three-letter codes, 865 C<http://www.loc.gov/standards/iso639-2/langcodes.html> 779 866 780 867 * The IANA list of registered languages (hopefully up-to-date), 781 C< ftp://ftp.isi.edu/in-notes/iana/assignments/languages/>868 C<http://www.iana.org/assignments/language-tags> 782 869 783 870 =head1 COPYRIGHT 784 871 785 Copyright (c) 1998 -2001Sean M. Burke. All rights reserved.872 Copyright (c) 1998+ Sean M. Burke. All rights reserved. 786 873 787 874 This library is free software; you can redistribute it and/or branches/release-40/extlib/I18N/LangTags/List.pm
r1098 r2594 2 2 require 5; 3 3 package I18N::LangTags::List; 4 # Time-stamp: "200 2-02-02 20:13:58 MST"4 # Time-stamp: "2004-10-06 23:26:21 ADT" 5 5 use strict; 6 use vars qw(%Name $Debug $VERSION);7 $VERSION = '0. 25';6 use vars qw(%Name %Is_Disrec $Debug $VERSION); 7 $VERSION = '0.35'; 8 8 # POD at the end. 9 9 … … 13 13 my $seeking = 1; 14 14 my $count = 0; 15 my($tag,$name); 15 my($disrec,$tag,$name); 16 my $last_name = ''; 16 17 while(<I18N::LangTags::List::DATA>) { 17 18 if($seeking) { 18 19 $seeking = 0 if m/=for woohah/; 19 } els e {20 next unless ($tag, $name) =21 m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;20 } elsif( ($disrec, $tag, $name) = 21 m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ 22 ) { 22 23 $name =~ s/\s*[;\.]*\s*$//g; 23 24 next unless $name; 24 25 ++$count; 25 26 print "<$tag> <$name>\n" if $Debug; 26 $Name{$tag} = $name; 27 $last_name = $Name{$tag} = $name; 28 $Is_Disrec{$tag} = 1 if $disrec; 29 } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) { 30 $Name{$1} = "$last_name (old tag)" if $last_name; 31 $Is_Disrec{$1} = 1; 27 32 } 28 33 } … … 72 77 } 73 78 79 #-------------------------------------------------------------------------- 80 81 sub is_decent { 82 my $tag = lc($_[0] || return 0); 83 #require I18N::LangTags; 84 85 return 0 unless 86 $tag =~ 87 /^(?: # First subtag 88 [xi] | [a-z]{2,3} 89 ) 90 (?: # Subtags thereafter 91 - # separator 92 [a-z0-9]{1,8} # subtag 93 )* 94 $/xs; 95 96 my @supers = (); 97 foreach my $bit (split('-', $tag)) { 98 push @supers, 99 scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; 100 } 101 return 0 unless @supers; 102 shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s; 103 return 0 unless @supers; 104 105 foreach my $f ($tag, @supers) { 106 return 0 if $Is_Disrec{$f}; 107 return 2 if $Name{$f}; 108 # so that decent subforms of indecent tags are decent 109 } 110 return 2 if $Name{$tag}; # not only is it decent, it's known! 111 return 1; 112 } 113 114 #-------------------------------------------------------------------------- 74 115 1; 75 116 … … 103 144 104 145 The function I18N::LangTags::List::name(...) is not exported. 146 147 This module also provides a function 148 C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff 149 the language tag is syntactically valid and is for general use (like 150 "fr" or "fr-ca", below). That is, it returns false for tags that are 151 syntactically invalid and for tags, like "aus", that are listed in 152 brackets below. This function is not exported. 105 153 106 154 The map of tags-to-names that it uses is accessable as … … 196 244 =item {ada} : Adangme 197 245 246 =item {ady} : Adyghe 247 248 eq Adygei 249 198 250 =item {aa} : Afar 199 251 … … 206 258 =item [{afa} : Afro-Asiatic (Other)] 207 259 208 =item {aka} : Akan 260 =item {ak} : Akan 261 262 (Formerly "aka".) 209 263 210 264 =item {akk} : Akkadian … … 265 319 =item {hy} : Armenian 266 320 321 =item {an} : Aragonese 322 267 323 =item [{art} : Artificial (Other)] 268 324 325 =item {ast} : Asturian 326 327 eq Bable. 328 269 329 =item {as} : Assamese 270 330 … … 277 337 =item [{map} : Austronesian (Other)] 278 338 279 =item {ava} : Avaric 339 =item {av} : Avaric 340 341 (Formerly "ava".) 280 342 281 343 =item {ae} : Avestan … … 291 353 eq Azeri 292 354 355 Notable forms: 356 {az-Arab} Azerbaijani in Arabic script; 357 {az-Cyrl} Azerbaijani in Cyrillic script; 358 {az-Latn} Azerbaijani in Latin script. 359 293 360 =item {ban} : Balinese 294 361 … … 297 364 =item {bal} : Baluchi 298 365 299 =item {bam} : Bambara 366 =item {bm} : Bambara 367 368 (Formerly "bam".) 300 369 301 370 =item [{bai} : Bamileke languages] … … 404 473 405 474 Many forms are mutually un-intelligible in spoken media. 406 Notable subforms: 475 Notable forms: 476 {zh-Hans} Chinese, in simplified script; 477 {zh-Hant} Chinese, in traditional script; 478 {zh-tw} Taiwan Chinese; 407 479 {zh-cn} PRC Chinese; 480 {zh-sg} Singapore Chinese; 481 {zh-mo} Macau Chinese; 408 482 {zh-hk} Hong Kong Chinese; 409 {zh-mo} Macau Chinese;410 {zh-sg} Singapore Chinese;411 {zh-tw} Taiwan Chinese;412 483 {zh-guoyu} Mandarin [Putonghua/Guoyu]; 413 {zh-hakka} Hakka [formerly i-hakka];484 {zh-hakka} Hakka [formerly "i-hakka"]; 414 485 {zh-min} Hokkien; 415 486 {zh-min-nan} Southern Hokkien; … … 448 519 eq Corse. 449 520 450 =item {cr e} : Cree451 452 NOT Creek! 521 =item {cr} : Cree 522 523 NOT Creek! (Formerly "cre".) 453 524 454 525 =item {mus} : Creek … … 477 548 478 549 =item {da} : Danish 550 551 =item {dar} : Dargwa 479 552 480 553 =item {day} : Dayak … … 492 565 =item {din} : Dinka 493 566 494 =item {div} : Divehi 567 =item {dv} : Divehi 568 569 eq Maldivian. (Formerly "div".) 495 570 496 571 =item {doi} : Dogri … … 556 631 eq Anglo-Saxon. (Historical) 557 632 633 =item {i-enochian} : Enochian (Artificial) 634 635 =item {myv} : Erzya 636 558 637 =item {eo} : Esperanto 559 638 … … 562 641 =item {et} : Estonian 563 642 564 =item {ewe} : Ewe 643 =item {ee} : Ewe 644 645 (Formerly "ewe".) 565 646 566 647 =item {ewo} : Ewondo … … 604 685 =item {fur} : Friulian 605 686 606 =item {ful} : Fulah 687 =item {ff} : Fulah 688 689 (Formerly "ful".) 607 690 608 691 =item {gaa} : Ga … … 616 699 eq Galician 617 700 618 =item {lug} : Ganda 701 =item {lg} : Ganda 702 703 (Formerly "lug".) 619 704 620 705 =item {gay} : Gayo … … 680 765 =item {hai} : Haida 681 766 767 =item {ht} : Haitian 768 769 eq Haitian Creole 770 682 771 =item {ha} : Hausa 683 772 … … 717 806 =item {is} : Icelandic 718 807 719 =item {ibo} : Igbo 808 =item {io} : Ido 809 810 (Artificial) 811 812 =item {ig} : Igbo 813 814 (Formerly "ibo".) 720 815 721 816 =item {ijo} : Ijo … … 733 828 =for etc 734 829 {in} Indonesian (old tag) 830 831 =item {inh} : Ingush 735 832 736 833 =item {ia} : Interlingua (International Auxiliary Language Association) … … 774 871 (NOT "jp"!) 775 872 776 =item {jw} : Javanese 873 =item {jv} : Javanese 874 875 (Formerly "jw" because of a typo.) 777 876 778 877 =item {jrb} : Judeo-Arabic … … 780 879 =item {jpr} : Judeo-Persian 781 880 881 =item {kbd} : Kabardian 882 782 883 =item {kab} : Kabyle 783 884 … … 788 889 eq Greenlandic "Eskimo" 789 890 891 =item {xal} : Kalmyk 892 790 893 =item {kam} : Kamba 791 894 … … 794 897 eq Kanarese. NOT Canadian! 795 898 796 =item {kau} : Kanuri 899 =item {kr} : Kanuri 900 901 (Formerly "kau".) 902 903 =item {krc} : Karachay-Balkar 797 904 798 905 =item {kaa} : Kara-Kalpak … … 802 909 =item {ks} : Kashmiri 803 910 911 =item {csb} : Kashubian 912 913 eq Kashub 914 804 915 =item {kaw} : Kawi 805 916 … … 830 941 =item {kv} : Komi 831 942 832 =item {kon} : Kongo 943 =item {kg} : Kongo 944 945 (Formerly "kon".) 833 946 834 947 =item {kok} : Konkani … … 878 991 =item {lb} : Letzeburgesch 879 992 880 eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)993 eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".) 881 994 882 995 =for etc … … 885 998 =item {lez} : Lezghian 886 999 1000 =item {li} : Limburgish 1001 1002 eq Limburger, eq Limburgan. NOT Letzeburgesch! 1003 887 1004 =item {ln} : Lingala 888 1005 … … 893 1010 eq Low Saxon. eq Low German. eq Low Saxon. 894 1011 1012 =item {art-lojban} : Lojban (Artificial) 1013 895 1014 =item {loz} : Lozi 896 1015 897 =item {lub} : Luba-Katanga 1016 =item {lu} : Luba-Katanga 1017 1018 (Formerly "lub".) 898 1019 899 1020 =item {lua} : Luba-Lulua … … 986 1107 =item {moh} : Mohawk 987 1108 1109 =item {mdf} : Moksha 1110 988 1111 =item {mo} : Moldavian 989 1112 … … 1008 1131 =item {nah} : Nahuatl 1009 1132 1133 =item {nap} : Neapolitan 1134 1010 1135 =item {na} : Nauru 1011 1136 1012 1137 =item {nv} : Navajo 1013 1138 1014 eq Navaho. (Formerly i-navajo.)1139 eq Navaho. (Formerly "i-navajo".) 1015 1140 1016 1141 =for etc … … 1039 1164 =item {niu} : Niuean 1040 1165 1166 =item {nog} : Nogai 1167 1041 1168 =item {non} : Old Norse 1042 1169 … … 1047 1174 Do not use this. 1048 1175 1049 =item {se} : Northern Sami1050 1051 eq Lappish. eq Lapp. eq (Northern) Saami.1052 1053 1176 =item {no} : Norwegian 1054 1177 … … 1057 1180 =item {nb} : Norwegian Bokmal 1058 1181 1059 eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)1182 eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".) 1060 1183 1061 1184 =for etc … … 1064 1187 =item {nn} : Norwegian Nynorsk 1065 1188 1066 (A form of Norwegian.) (Formerly no-nyn.)1189 (A form of Norwegian.) (Formerly "no-nyn".) 1067 1190 1068 1191 =for etc … … 1083 1206 eq ProvenE<ccedil>al, eq Provencal 1084 1207 1085 =item {oj i} : Ojibwa1086 1087 eq Ojibwe. 1208 =item {oj} : Ojibwa 1209 1210 eq Ojibwe. (Formerly "oji".) 1088 1211 1089 1212 =item {or} : Oriya … … 1203 1326 NOT Aramaic! 1204 1327 1328 =item {se} : Northern Sami 1329 1330 eq Lappish. eq Lapp. eq (Northern) Saami. 1331 1332 =item {sma} : Southern Sami 1333 1334 =item {smn} : Inari Sami 1335 1336 =item {smj} : Lule Sami 1337 1338 =item {sms} : Skolt Sami 1339 1205 1340 =item [{smi} : Sami languages (Other)] 1206 1341 … … 1234 1369 1235 1370 eq Serb. NOT Sorbian. 1371 1372 Notable forms: 1373 {sr-Cyrl} : Serbian in Cyrillic script; 1374 {sr-Latn} : Serbian in Latin script. 1236 1375 1237 1376 =item {srr} : Serer … … 1250 1389 {sgn-ni} Nicaraguan Sign Language (ISN); 1251 1390 {sgn-us} American Sign Language (ASL). 1391 1392 (And so on with other country codes as the subtag.) 1252 1393 1253 1394 =item {bla} : Siksika … … 1423 1564 =item {tum} : Tumbuka 1424 1565 1566 =item [{tup} : Tupi languages] 1567 1425 1568 =item {tr} : Turkish 1426 1569 … … 1431 1574 (Typically in Arabic script) (Historical) 1432 1575 1576 =item {crh} : Crimean Turkish 1577 1578 eq Crimean Tatar 1579 1433 1580 =item {tk} : Turkmen 1434 1581 … … 1443 1590 =item {tw} : Twi 1444 1591 1592 =item {udm} : Udmurt 1593 1445 1594 =item {uga} : Ugaritic 1446 1595 … … 1463 1612 eq E<Ouml>zbek 1464 1613 1614 Notable forms: 1615 {uz-Cyrl} Uzbek in Cyrillic script; 1616 {uz-Latn} Uzbek in Latin script. 1617 1465 1618 =item {vai} : Vai 1466 1619 1467 =item {ve n} : Venda1468 1469 NOT Wendish! NOT Wend! NOT Avestan! 1620 =item {ve} : Venda 1621 1622 NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".) 1470 1623 1471 1624 =item {vi} : Vietnamese … … 1482 1635 1483 1636 =item [{wak} : Wakashan languages] 1637 1638 =item {wa} : Walloon 1484 1639 1485 1640 =item {wal} : Walamo … … 1518 1673 eq Yap 1519 1674 1675 =item {ii} : Sichuan Yi 1676 1520 1677 =item {yi} : Yiddish 1521 1678 1522 Formerly "ji". Sometimes in Roman script, sometimesin Hebrew script.1523 1524 =for etc 1525 { ji} Yiddish (old tag)1679 Formerly "ji". Usually in Hebrew script. 1680 1681 Notable forms: 1682 {yi-latn} Yiddish in Latin script 1526 1683 1527 1684 =item {yo} : Yoruba … … 1559 1716 =head1 COPYRIGHT AND DISCLAIMER 1560 1717 1561 Copyright (c) 2001 ,2002Sean M. Burke. All rights reserved.1718 Copyright (c) 2001+ Sean M. Burke. All rights reserved. 1562 1719 1563 1720 You can redistribute and/or branches/release-40/extlib/Locale/Maketext.pm
r1098 r2594 1 2 # Time-stamp: "2001-06-21 23:09:33 MDT"3 4 require 5;5 1 package Locale::Maketext; 6 2 use strict; 7 3 use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS 8 $USE_LITERALS);4 $USE_LITERALS $MATCH_SUPERS_TIGHTLY); 9 5 use Carp (); 10 use I18N::LangTags 0. 21();6 use I18N::LangTags 0.30 (); 11 7 12 8 #-------------------------------------------------------------------------- 13 9 14 10 BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } 15 # define the constant 'DEBUG' at compile-time16 17 $VERSION = "1.03";11 # define the constant 'DEBUG' at compile-time 12 13 $VERSION = '1.13'; 18 14 @ISA = (); 19 15 20 16 $MATCH_SUPERS = 1; 21 $USING_LANGUAGE_TAGS = 1; 22 # Turning this off is somewhat of a security risk in that little or no 23 # checking will be done on the legality of tokens passed to the 24 # eval("use $module_name") in _try_use. If you turn this off, you have 25 # to do your own taint checking. 17 $MATCH_SUPERS_TIGHTLY = 1; 18 $USING_LANGUAGE_TAGS = 1; 19 # Turning this off is somewhat of a security risk in that little or no 20 # checking will be done on the legality of tokens passed to the 21 # eval("use $module_name") in _try_use. If you turn this off, you have 22 # to do your own taint checking. 26 23 27 24 $USE_LITERALS = 1 unless defined $USE_LITERALS; 28 # a hint for compiling bracket-notation things.25 # a hint for compiling bracket-notation things. 29 26 30 27 my %isa_scan = (); … … 33 30 34 31 sub quant { 35 my($handle, $num, @forms) = @_;36 37 return $num if @forms == 0; # what should this mean?38 return $forms[2] if @forms > 2 and $num == 0; # special zeroth case39 40 # Normal case:41 # Note that the formatting of $num is preserved.42 return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );43 # Most human languages put the number phrase before the qualified phrase.32 my($handle, $num, @forms) = @_; 33 34 return $num if @forms == 0; # what should this mean? 35 return $forms[2] if @forms > 2 and $num == 0; # special zeroth case 36 37 # Normal case: 38 # Note that the formatting of $num is preserved. 39 return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) ); 40 # Most human languages put the number phrase before the qualified phrase. 44 41 } 45 42 46 43 47 44 sub numerate { 48 # return this lexical item in a form appropriate to this number 49 my($handle, $num, @forms) = @_; 50 my $s = ($num == 1); 51 52 return '' unless @forms; 53 if(@forms == 1) { # only the headword form specified 54 return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack. 55 } else { # sing and plural were specified 56 return $s ? $forms[0] : $forms[1]; 57 } 45 # return this lexical item in a form appropriate to this number 46 my($handle, $num, @forms) = @_; 47 my $s = ($num == 1); 48 49 return '' unless @forms; 50 if(@forms == 1) { # only the headword form specified 51 return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack. 52 } 53 else { # sing and plural were specified 54 return $s ? $forms[0] : $forms[1]; 55 } 58 56 } 59 57 … … 61 59 62 60 sub numf { 63 my($handle, $num) = @_[0,1]; 64 if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) { 65 $num += 0; # Just use normal integer stringification. 66 # Specifically, don't let %G turn ten million into 1E+007 67 } else { 68 $num = CORE::sprintf("%G", $num); 69 # "CORE::" is there to avoid confusion with the above sub sprintf. 70 } 71 while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5 72 # The initial \d+ gobbles as many digits as it can, and then we 73 # backtrack so it un-eats the rightmost three, and then we 74 # insert the comma there. 75 76 $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'}; 77 # This is just a lame hack instead of using Number::Format 78 return $num; 61 my($handle, $num) = @_[0,1]; 62 if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) { 63 $num += 0; # Just use normal integer stringification. 64 # Specifically, don't let %G turn ten million into 1E+007 65 } 66 else { 67 $num = CORE::sprintf('%G', $num); 68 # "CORE::" is there to avoid confusion with the above sub sprintf. 69 } 70 while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5 71 # The initial \d+ gobbles as many digits as it can, and then we 72 # backtrack so it un-eats the rightmost three, and then we 73 # insert the comma there. 74 75 $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'}; 76 # This is just a lame hack instead of using Number::Format 77 return $num; 79 78 } 80 79 81 80 sub sprintf { 82 no integer;83 my($handle, $format, @params) = @_;84 return CORE::sprintf($format, @params);81 no integer; 82 my($handle, $format, @params) = @_; 83 return CORE::sprintf($format, @params); 85 84 # "CORE::" is there to avoid confusion with myself! 86 85 } … … 91 90 92 91 sub language_tag { 93 my $it = ref($_[0]) || $_[0];94 return undef unless $it =~ m/([^':]+)(?:::)?$/s;95 $it = lc($1);96 $it =~ tr<_><->;97 return $it;92 my $it = ref($_[0]) || $_[0]; 93 return undef unless $it =~ m/([^':]+)(?:::)?$/s; 94 $it = lc($1); 95 $it =~ tr<_><->; 96 return $it; 98 97 } 99 98 100 99 sub encoding { 101 my $it = $_[0];102 return(103 (ref($it) && $it->{'encoding'})104 || "iso-8859-1"# Latin-1105 );106 } 100 my $it = $_[0]; 101 return( 102 (ref($it) && $it->{'encoding'}) 103 || 'iso-8859-1' # Latin-1 104 ); 105 } 107 106 108 107 #-------------------------------------------------------------------------- … … 115 114 116 115 sub fail_with { # an actual attribute method! 117 my($handle, @params) = @_;118 return unless ref($handle);119 $handle->{'fail'} = $params[0] if @params;120 return $handle->{'fail'};116 my($handle, @params) = @_; 117 return unless ref($handle); 118 $handle->{'fail'} = $params[0] if @params; 119 return $handle->{'fail'}; 121 120 } 122 121 … … 124 123 125 124 sub failure_handler_auto { 126 # Meant to be used like: 127 # $handle->fail_with('failure_handler_auto') 128 129 my($handle, $phrase, @params) = @_; 130 $handle->{'failure_lex'} ||= {}; 131 my $lex = $handle->{'failure_lex'}; 132 133 my $value; 134 $lex->{$phrase} ||= ($value = $handle->_compile($phrase)); 135 136 # Dumbly copied from sub maketext: 137 { 138 local $SIG{'__DIE__'}; 139 eval { $value = &$value($handle, @_) }; 140 } 141 # If we make it here, there was an exception thrown in the 142 # call to $value, and so scream: 143 if($@) { 144 my $err = $@; 145 # pretty up the error message 146 $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?> 147 <\n in bracket code [compiled line $1],>s; 148 #$err =~ s/\n?$/\n/s; 149 Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; 150 # Rather unexpected, but suppose that the sub tried calling 151 # a method that didn't exist. 152 } else { 153 return $value; 154 } 125 # Meant to be used like: 126 # $handle->fail_with('failure_handler_auto') 127 128 my $handle = shift; 129 my $phrase = shift; 130 131 $handle->{'failure_lex'} ||= {}; 132 my $lex = $handle->{'failure_lex'}; 133 134 my $value; 135 $lex->{$phrase} ||= ($value = $handle->_compile($phrase)); 136 137 # Dumbly copied from sub maketext: 138 return ${$value} if ref($value) eq 'SCALAR'; 139 return $value if ref($value) ne 'CODE'; 140 { 141 local $SIG{'__DIE__'}; 142 eval { $value = &$value($handle, @_) }; 143 } 144 # If we make it here, there was an exception thrown in the 145 # call to $value, and so scream: 146 if($@) { 147 my $err = $@; 148 # pretty up the error message 149 $err =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?} 150 {\n in bracket code [compiled line $1],}s; 151 #$err =~ s/\n?$/\n/s; 152 Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; 153 # Rather unexpected, but suppose that the sub tried calling 154 # a method that didn't exist. 155 } 156 else { 157 return $value; 158 } 155 159 } 156 160 … … 158 162 159 163 sub new { 160 # Nothing fancy!161 my $class = ref($_[0]) || $_[0];162 my $handle = bless {}, $class;163 $handle->init;164 return $handle;164 # Nothing fancy! 165 my $class = ref($_[0]) || $_[0]; 166 my $handle = bless {}, $class; 167 $handle->init; 168 return $handle; 165 169 } 166 170 … … 170 174 171 175 sub maketext { 172 # Remember, this can fail. Failure is controllable many ways. 173 Carp::croak "maketext requires at least one parameter" unless @_ > 1; 174 175 my($handle, $phrase) = splice(@_,0,2); 176 177 # Look up the value: 178 179 my $value; 180 foreach my $h_r ( 181 @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs } 182 ) { 183 print "* Looking up \"$phrase\" in $h_r\n" if DEBUG; 184 if(exists $h_r->{$phrase}) { 185 print " Found \"$phrase\" in $h_r\n" if DEBUG; 186 unless(ref($value = $h_r->{$phrase})) { 187 # Nonref means it's not yet compiled. Compile and replace. 188 $value = $h_r->{$phrase} = $handle->_compile($value); 189 } 190 last; 191 } elsif($phrase !~ m/^_/s and $h_r->{'_AUTO'}) { 192 # it's an auto lex, and this is an autoable key! 193 print " Automaking \"$phrase\" into $h_r\n" if DEBUG; 194 195 $value = $h_r->{$phrase} = $handle->_compile($phrase); 196 last; 197 } 198 print " Not found in $h_r, nor automakable\n" if DEBUG > 1; 199 # else keep looking 200 } 201 202 unless(defined($value)) { 203 print "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, 204 " fails.\n" if DEBUG; 205 if(ref($handle) and $handle->{'fail'}) { 206 print "WARNING0: maketext fails looking for <$phrase>\n" if DEBUG; 207 my $fail; 208 if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference 209 return &{$fail}($handle, $phrase, @_); 210 # If it ever returns, it should return a good value. 211 } else { # It's a method name 212 return $handle->$fail($phrase, @_); 213 # If it ever returns, it should return a good value. 214 } 215 } else { 216 # All we know how to do is this; 217 Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed"); 218 } 219 } 220 221 return $$value if ref($value) eq 'SCALAR'; 222 return $value unless ref($value) eq 'CODE'; 223 224 { 225 local $SIG{'__DIE__'}; 226 eval { $value = &$value($handle, @_) }; 227 } 228 # If we make it here, there was an exception thrown in the 229 # call to $value, and so scream: 230 if($@) { 231 my $err = $@; 232 # pretty up the error message 233 $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?> 234 <\n in bracket code [compiled line $1],>s; 235 #$err =~ s/\n?$/\n/s; 236 Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; 237 # Rather unexpected, but suppose that the sub tried calling 238 # a method that didn't exist. 239 } else { 240 return $value; 241 } 176 # Remember, this can fail. Failure is controllable many ways. 177 Carp::croak 'maketext requires at least one parameter' unless @_ > 1; 178 179 my($handle, $phrase) = splice(@_,0,2); 180 Carp::confess('No handle/phrase') unless (defined($handle) && defined($phrase)); 181 182 183 # Don't interefere with $@ in case that's being interpolated into the msg. 184 local $@; 185 186 # Look up the value: 187 188 my $value; 189 foreach my $h_r ( 190 @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs } 191 ) { 192 DEBUG and warn "* Looking up \"$phrase\" in $h_r\n"; 193 if(exists $h_r->{$phrase}) { 194 DEBUG and warn " Found \"$phrase\" in $h_r\n"; 195 unless(ref($value = $h_r->{$phrase})) { 196 # Nonref means it's not yet compiled. Compile and replace. 197 $value = $h_r->{$phrase} = $handle->_compile($value); 198 } 199 last; 200 } 201 elsif($phrase !~ m/^_/s and $h_r->{'_AUTO'}) { 202 # it's an auto lex, and this is an autoable key! 203 DEBUG and warn " Automaking \"$phrase\" into $h_r\n"; 204 205 $value = $h_r->{$phrase} = $handle->_compile($phrase); 206 last; 207 } 208 DEBUG>1 and print " Not found in $h_r, nor automakable\n"; 209 # else keep looking 210 } 211 212 unless(defined($value)) { 213 DEBUG and warn "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, " fails.\n"; 214 if(ref($handle) and $handle->{'fail'}) { 215 DEBUG and warn "WARNING0: maketext fails looking for <$phrase>\n"; 216 my $fail; 217 if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference 218 return &{$fail}($handle, $phrase, @_); 219 # If it ever returns, it should return a good value. 220 } 221 else { # It's a method name 222 return $handle->$fail($phrase, @_); 223 # If it ever returns, it should return a good value. 224 } 225 } 226 else { 227 # All we know how to do is this; 228 Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed"); 229 } 230 } 231 232 return $$value if ref($value) eq 'SCALAR'; 233 return $value unless ref($value) eq 'CODE'; 234 235 { 236 local $SIG{'__DIE__'}; 237 eval { $value = &$value($handle, @_) }; 238 } 239 # If we make it here, there was an exception thrown in the 240 # call to $value, and so scream: 241 if ($@) { 242 my $err = $@; 243 # pretty up the error message 244 $err =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?} 245 {\n in bracket code [compiled line $1],}s; 246 #$err =~ s/\n?$/\n/s; 247 Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; 248 # Rather unexpected, but suppose that the sub tried calling 249 # a method that didn't exist. 250 } 251 else { 252 return $value; 253 } 242 254 } 243 255 … … 245 257 246 258 sub get_handle { # This is a constructor and, yes, it CAN FAIL. 247 # Its class argument has to be the base class for the current 248 # application's l10n files. 249 my($base_class, @languages) = @_; 250 $base_class = ref($base_class) || $base_class; 251 # Complain if they use __PACKAGE__ as a project base class? 252 253 unless(@languages) { # Calling with no args is magical! wooo, magic! 254 if(length( $ENV{'REQUEST_METHOD'} || '' )) { # I'm a CGI 255 my $in = $ENV{'HTTP_ACCEPT_LANGUAGE'} || ''; 256 # supposedly that works under mod_perl, too. 257 $in =~ s<\([\)]*\)><>g; # Kill parens'd things -- just a hack. 258 @languages = &I18N::LangTags::extract_language_tags($in) if length $in; 259 # ...which untaints, incidentally. 260 261 } else { # Not running as a CGI: try to puzzle out from the environment 262 if(length( $ENV{'LANG'} || '' )) { 263 push @languages, split m/[,:]/, $ENV{'LANG'}; 264 # LANG can be only /one/ locale as far as I know, but what the hey. 265 } 266 if(length( $ENV{'LANGUAGE'} || '' )) { 267 push @languages, split m/[,:]/, $ENV{'LANGUAGE'}; 268 } 269 print "Noting ENV LANG ", join(',', @languages),"\n" if DEBUG; 270 # Those are really locale IDs, but they get xlated a few lines down. 271 272 if(&_try_use('Win32::Locale')) { 273 # If we have that module installed... 274 push @languages, Win32::Locale::get_language() 275 if defined &Win32::Locale::get_language; 276 } 277 } 278 } 279 280 #------------------------------------------------------------------------ 281 print "Lgs1: ", map("<$_>", @languages), "\n" if DEBUG; 282 283 if($USING_LANGUAGE_TAGS) { 284 @languages = map &I18N::LangTags::locale2language_tag($_), @languages; 285 # if it's a lg tag, fine, pass thru (untainted) 286 # if it's a locale ID, try converting to a lg tag (untainted), 287 # oth
