Changeset 2594 for branches/release-40/extlib/I18N/LangTags/List.pm
- Timestamp:
- 06/18/08 02:03:47 (18 months ago)
- Files:
-
- 1 modified
-
branches/release-40/extlib/I18N/LangTags/List.pm (modified) (47 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/release-40/extlib/I18N/LangTags/List.pm
r1098 r2594 2 2 require 5; 3 3 package I18N::LangTags::List; 4 # Time-stamp: "200 2-02-02 20:13:58 MST"4 # Time-stamp: "2004-10-06 23:26:21 ADT" 5 5 use strict; 6 use vars qw(%Name $Debug $VERSION);7 $VERSION = '0. 25';6 use vars qw(%Name %Is_Disrec $Debug $VERSION); 7 $VERSION = '0.35'; 8 8 # POD at the end. 9 9 … … 13 13 my $seeking = 1; 14 14 my $count = 0; 15 my($tag,$name); 15 my($disrec,$tag,$name); 16 my $last_name = ''; 16 17 while(<I18N::LangTags::List::DATA>) { 17 18 if($seeking) { 18 19 $seeking = 0 if m/=for woohah/; 19 } els e {20 next unless ($tag, $name) =21 m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;20 } elsif( ($disrec, $tag, $name) = 21 m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ 22 ) { 22 23 $name =~ s/\s*[;\.]*\s*$//g; 23 24 next unless $name; 24 25 ++$count; 25 26 print "<$tag> <$name>\n" if $Debug; 26 $Name{$tag} = $name; 27 $last_name = $Name{$tag} = $name; 28 $Is_Disrec{$tag} = 1 if $disrec; 29 } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) { 30 $Name{$1} = "$last_name (old tag)" if $last_name; 31 $Is_Disrec{$1} = 1; 27 32 } 28 33 } … … 72 77 } 73 78 79 #-------------------------------------------------------------------------- 80 81 sub is_decent { 82 my $tag = lc($_[0] || return 0); 83 #require I18N::LangTags; 84 85 return 0 unless 86 $tag =~ 87 /^(?: # First subtag 88 [xi] | [a-z]{2,3} 89 ) 90 (?: # Subtags thereafter 91 - # separator 92 [a-z0-9]{1,8} # subtag 93 )* 94 $/xs; 95 96 my @supers = (); 97 foreach my $bit (split('-', $tag)) { 98 push @supers, 99 scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; 100 } 101 return 0 unless @supers; 102 shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s; 103 return 0 unless @supers; 104 105 foreach my $f ($tag, @supers) { 106 return 0 if $Is_Disrec{$f}; 107 return 2 if $Name{$f}; 108 # so that decent subforms of indecent tags are decent 109 } 110 return 2 if $Name{$tag}; # not only is it decent, it's known! 111 return 1; 112 } 113 114 #-------------------------------------------------------------------------- 74 115 1; 75 116 … … 103 144 104 145 The function I18N::LangTags::List::name(...) is not exported. 146 147 This module also provides a function 148 C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff 149 the language tag is syntactically valid and is for general use (like 150 "fr" or "fr-ca", below). That is, it returns false for tags that are 151 syntactically invalid and for tags, like "aus", that are listed in 152 brackets below. This function is not exported. 105 153 106 154 The map of tags-to-names that it uses is accessable as … … 196 244 =item {ada} : Adangme 197 245 246 =item {ady} : Adyghe 247 248 eq Adygei 249 198 250 =item {aa} : Afar 199 251 … … 206 258 =item [{afa} : Afro-Asiatic (Other)] 207 259 208 =item {aka} : Akan 260 =item {ak} : Akan 261 262 (Formerly "aka".) 209 263 210 264 =item {akk} : Akkadian … … 265 319 =item {hy} : Armenian 266 320 321 =item {an} : Aragonese 322 267 323 =item [{art} : Artificial (Other)] 268 324 325 =item {ast} : Asturian 326 327 eq Bable. 328 269 329 =item {as} : Assamese 270 330 … … 277 337 =item [{map} : Austronesian (Other)] 278 338 279 =item {ava} : Avaric 339 =item {av} : Avaric 340 341 (Formerly "ava".) 280 342 281 343 =item {ae} : Avestan … … 291 353 eq Azeri 292 354 355 Notable forms: 356 {az-Arab} Azerbaijani in Arabic script; 357 {az-Cyrl} Azerbaijani in Cyrillic script; 358 {az-Latn} Azerbaijani in Latin script. 359 293 360 =item {ban} : Balinese 294 361 … … 297 364 =item {bal} : Baluchi 298 365 299 =item {bam} : Bambara 366 =item {bm} : Bambara 367 368 (Formerly "bam".) 300 369 301 370 =item [{bai} : Bamileke languages] … … 404 473 405 474 Many forms are mutually un-intelligible in spoken media. 406 Notable subforms: 475 Notable forms: 476 {zh-Hans} Chinese, in simplified script; 477 {zh-Hant} Chinese, in traditional script; 478 {zh-tw} Taiwan Chinese; 407 479 {zh-cn} PRC Chinese; 480 {zh-sg} Singapore Chinese; 481 {zh-mo} Macau Chinese; 408 482 {zh-hk} Hong Kong Chinese; 409 {zh-mo} Macau Chinese;410 {zh-sg} Singapore Chinese;411 {zh-tw} Taiwan Chinese;412 483 {zh-guoyu} Mandarin [Putonghua/Guoyu]; 413 {zh-hakka} Hakka [formerly i-hakka];484 {zh-hakka} Hakka [formerly "i-hakka"]; 414 485 {zh-min} Hokkien; 415 486 {zh-min-nan} Southern Hokkien; … … 448 519 eq Corse. 449 520 450 =item {cr e} : Cree451 452 NOT Creek! 521 =item {cr} : Cree 522 523 NOT Creek! (Formerly "cre".) 453 524 454 525 =item {mus} : Creek … … 477 548 478 549 =item {da} : Danish 550 551 =item {dar} : Dargwa 479 552 480 553 =item {day} : Dayak … … 492 565 =item {din} : Dinka 493 566 494 =item {div} : Divehi 567 =item {dv} : Divehi 568 569 eq Maldivian. (Formerly "div".) 495 570 496 571 =item {doi} : Dogri … … 556 631 eq Anglo-Saxon. (Historical) 557 632 633 =item {i-enochian} : Enochian (Artificial) 634 635 =item {myv} : Erzya 636 558 637 =item {eo} : Esperanto 559 638 … … 562 641 =item {et} : Estonian 563 642 564 =item {ewe} : Ewe 643 =item {ee} : Ewe 644 645 (Formerly "ewe".) 565 646 566 647 =item {ewo} : Ewondo … … 604 685 =item {fur} : Friulian 605 686 606 =item {ful} : Fulah 687 =item {ff} : Fulah 688 689 (Formerly "ful".) 607 690 608 691 =item {gaa} : Ga … … 616 699 eq Galician 617 700 618 =item {lug} : Ganda 701 =item {lg} : Ganda 702 703 (Formerly "lug".) 619 704 620 705 =item {gay} : Gayo … … 680 765 =item {hai} : Haida 681 766 767 =item {ht} : Haitian 768 769 eq Haitian Creole 770 682 771 =item {ha} : Hausa 683 772 … … 717 806 =item {is} : Icelandic 718 807 719 =item {ibo} : Igbo 808 =item {io} : Ido 809 810 (Artificial) 811 812 =item {ig} : Igbo 813 814 (Formerly "ibo".) 720 815 721 816 =item {ijo} : Ijo … … 733 828 =for etc 734 829 {in} Indonesian (old tag) 830 831 =item {inh} : Ingush 735 832 736 833 =item {ia} : Interlingua (International Auxiliary Language Association) … … 774 871 (NOT "jp"!) 775 872 776 =item {jw} : Javanese 873 =item {jv} : Javanese 874 875 (Formerly "jw" because of a typo.) 777 876 778 877 =item {jrb} : Judeo-Arabic … … 780 879 =item {jpr} : Judeo-Persian 781 880 881 =item {kbd} : Kabardian 882 782 883 =item {kab} : Kabyle 783 884 … … 788 889 eq Greenlandic "Eskimo" 789 890 891 =item {xal} : Kalmyk 892 790 893 =item {kam} : Kamba 791 894 … … 794 897 eq Kanarese. NOT Canadian! 795 898 796 =item {kau} : Kanuri 899 =item {kr} : Kanuri 900 901 (Formerly "kau".) 902 903 =item {krc} : Karachay-Balkar 797 904 798 905 =item {kaa} : Kara-Kalpak … … 802 909 =item {ks} : Kashmiri 803 910 911 =item {csb} : Kashubian 912 913 eq Kashub 914 804 915 =item {kaw} : Kawi 805 916 … … 830 941 =item {kv} : Komi 831 942 832 =item {kon} : Kongo 943 =item {kg} : Kongo 944 945 (Formerly "kon".) 833 946 834 947 =item {kok} : Konkani … … 878 991 =item {lb} : Letzeburgesch 879 992 880 eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)993 eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".) 881 994 882 995 =for etc … … 885 998 =item {lez} : Lezghian 886 999 1000 =item {li} : Limburgish 1001 1002 eq Limburger, eq Limburgan. NOT Letzeburgesch! 1003 887 1004 =item {ln} : Lingala 888 1005 … … 893 1010 eq Low Saxon. eq Low German. eq Low Saxon. 894 1011 1012 =item {art-lojban} : Lojban (Artificial) 1013 895 1014 =item {loz} : Lozi 896 1015 897 =item {lub} : Luba-Katanga 1016 =item {lu} : Luba-Katanga 1017 1018 (Formerly "lub".) 898 1019 899 1020 =item {lua} : Luba-Lulua … … 986 1107 =item {moh} : Mohawk 987 1108 1109 =item {mdf} : Moksha 1110 988 1111 =item {mo} : Moldavian 989 1112 … … 1008 1131 =item {nah} : Nahuatl 1009 1132 1133 =item {nap} : Neapolitan 1134 1010 1135 =item {na} : Nauru 1011 1136 1012 1137 =item {nv} : Navajo 1013 1138 1014 eq Navaho. (Formerly i-navajo.)1139 eq Navaho. (Formerly "i-navajo".) 1015 1140 1016 1141 =for etc … … 1039 1164 =item {niu} : Niuean 1040 1165 1166 =item {nog} : Nogai 1167 1041 1168 =item {non} : Old Norse 1042 1169 … … 1047 1174 Do not use this. 1048 1175 1049 =item {se} : Northern Sami1050 1051 eq Lappish. eq Lapp. eq (Northern) Saami.1052 1053 1176 =item {no} : Norwegian 1054 1177 … … 1057 1180 =item {nb} : Norwegian Bokmal 1058 1181 1059 eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)1182 eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".) 1060 1183 1061 1184 =for etc … … 1064 1187 =item {nn} : Norwegian Nynorsk 1065 1188 1066 (A form of Norwegian.) (Formerly no-nyn.)1189 (A form of Norwegian.) (Formerly "no-nyn".) 1067 1190 1068 1191 =for etc … … 1083 1206 eq ProvenE<ccedil>al, eq Provencal 1084 1207 1085 =item {oj i} : Ojibwa1086 1087 eq Ojibwe. 1208 =item {oj} : Ojibwa 1209 1210 eq Ojibwe. (Formerly "oji".) 1088 1211 1089 1212 =item {or} : Oriya … … 1203 1326 NOT Aramaic! 1204 1327 1328 =item {se} : Northern Sami 1329 1330 eq Lappish. eq Lapp. eq (Northern) Saami. 1331 1332 =item {sma} : Southern Sami 1333 1334 =item {smn} : Inari Sami 1335 1336 =item {smj} : Lule Sami 1337 1338 =item {sms} : Skolt Sami 1339 1205 1340 =item [{smi} : Sami languages (Other)] 1206 1341 … … 1234 1369 1235 1370 eq Serb. NOT Sorbian. 1371 1372 Notable forms: 1373 {sr-Cyrl} : Serbian in Cyrillic script; 1374 {sr-Latn} : Serbian in Latin script. 1236 1375 1237 1376 =item {srr} : Serer … … 1250 1389 {sgn-ni} Nicaraguan Sign Language (ISN); 1251 1390 {sgn-us} American Sign Language (ASL). 1391 1392 (And so on with other country codes as the subtag.) 1252 1393 1253 1394 =item {bla} : Siksika … … 1423 1564 =item {tum} : Tumbuka 1424 1565 1566 =item [{tup} : Tupi languages] 1567 1425 1568 =item {tr} : Turkish 1426 1569 … … 1431 1574 (Typically in Arabic script) (Historical) 1432 1575 1576 =item {crh} : Crimean Turkish 1577 1578 eq Crimean Tatar 1579 1433 1580 =item {tk} : Turkmen 1434 1581 … … 1443 1590 =item {tw} : Twi 1444 1591 1592 =item {udm} : Udmurt 1593 1445 1594 =item {uga} : Ugaritic 1446 1595 … … 1463 1612 eq E<Ouml>zbek 1464 1613 1614 Notable forms: 1615 {uz-Cyrl} Uzbek in Cyrillic script; 1616 {uz-Latn} Uzbek in Latin script. 1617 1465 1618 =item {vai} : Vai 1466 1619 1467 =item {ve n} : Venda1468 1469 NOT Wendish! NOT Wend! NOT Avestan! 1620 =item {ve} : Venda 1621 1622 NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".) 1470 1623 1471 1624 =item {vi} : Vietnamese … … 1482 1635 1483 1636 =item [{wak} : Wakashan languages] 1637 1638 =item {wa} : Walloon 1484 1639 1485 1640 =item {wal} : Walamo … … 1518 1673 eq Yap 1519 1674 1675 =item {ii} : Sichuan Yi 1676 1520 1677 =item {yi} : Yiddish 1521 1678 1522 Formerly "ji". Sometimes in Roman script, sometimesin Hebrew script.1523 1524 =for etc 1525 { ji} Yiddish (old tag)1679 Formerly "ji". Usually in Hebrew script. 1680 1681 Notable forms: 1682 {yi-latn} Yiddish in Latin script 1526 1683 1527 1684 =item {yo} : Yoruba … … 1559 1716 =head1 COPYRIGHT AND DISCLAIMER 1560 1717 1561 Copyright (c) 2001 ,2002Sean M. Burke. All rights reserved.1718 Copyright (c) 2001+ Sean M. Burke. All rights reserved. 1562 1719 1563 1720 You can redistribute and/or
