You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/10/16 05:55:33 UTC
svn commit: r1184754 [33/33] - in /lucene/dev/trunk: dev-tools/eclipse/
solr/ solr/contrib/langid/ solr/contrib/langid/lib/
solr/contrib/langid/src/java/org/apache/solr/update/processor/
solr/contrib/langid/src/resources/ solr/contrib/langid/src/resour...
Added: lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/zh-tw
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/zh-tw?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/zh-tw (added)
+++ lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/zh-tw Sun Oct 16 03:55:30 2011
@@ -0,0 +1 @@
+{"freq":{"·":11773,"é":695,"åï¼å¸¸":664,"и":659,"о":642,"а":705," ã":2780," ã":1867," ã":1053,"ã":2229,"ã":93215,"ã":80530,"ã":12775,"ã":12789,"ã":16776,"ã":16978,"ã¢":2133,"ä¹":2518,"ä¹":8240,"ä¹":27365,"ä¹":17356,"ä¹
":750,"主":15490,"丼":1230,"ä¸":47219,"並":6623,"ä¸":85837,"ä¸":96789,"ä¸":9230,"ä¸":143774,"ä¸":7943,"ä¸":15307,"ä¸":7965,"ä¸":69580,"ä¸":51222,"ä¸":112603,"ä¸":1691,"份":8990,"ä»»":5556,"以":21889,"令":1163,"代":10588,"ä»":7458,"ä»":669,"ä»":884,"ä»":2840,"人":38053,"亡":666,"亦":2741,"交":18627,"京":3334,"äº":3430,"äº":2759,"äº":10406,"äº":8549,"äº":591,"äº":19252,"äº":6375,"使":59515,"ä½":1174,"ä½":1250,"ä½":13512,"ä½":4206,"ä½":14816,"ä½":16276,"ä½":33991,"伯":2485,"ä¼":2316,"�
�¼":1149,"ä¼":48236,"ä¿¡":7776,"ä¿":2093,"ä¿":1210,"ä¿":3393,"ä¾":9528,"ä¾":2206," å":709,"å®":3071,"å":1448," å
¬":2117,"å":3624,"å¡":7098,"å¥":1699,"å":4392,"å":23261,"å¨":1446,"å¼":664,"å½":2330,"å«":1432,"å":20323,"å":8443,"å":2089,"å":7724," å«":1516,"å":3613,"å":3031,"å":2612,"å":5349,"å·":1078,"å¹":3458,"åº":6608," å":633,"å":721,"å¦":1100,"å¡":2688,"å":4364,"å":2148,"å":2775,"å":2763,"å":50470,"å°":26714,"å¨":35708,"å":1107,"å":5909,"å ":6124,"å´":592," å":1513,"å¨":3287,"å°":646,"å":1221," ä¸":2854," ä¸":3070," ä¸":1592," ä¸":4439," ä¸":917," ä¸":3186,"åª":845,"å
±":7054,"å
¶":12215,"å
·":2485,"å
¥":4421,"å
§":8167,"å
©":4796,"å
¨":7445,"å
«":1932,"å
":2118,"å
¬":17482,"å
":1569,
"å
":4231,"å
":7487,"å
":2846,"å
":2097,"å
":625,"å³":4645,"å¹":1006,"å«":51626,"å":19430,"å":2429,"å":661,"å":6791,"å»":1179,"å":2509,"å":2465,"å":2477,"å":17959,"å":732,"å":5332,"å":2854,"å¸":7145,"å°":11564,"å¯":7754,"åª":1856," å":2397,"å£":4174,"å":7454,"å
":4825," ä½":611," ä½":594,"å":15370,"å":2646," 使":1198,"å":1769,"å":830,"å":1228,"å":5341,"å":18522,"å":2039,"å·":655,"å°":3241,"å³":2842,"å¡":3151,"å":2056,"å":3028,"å":4955," 人":1795,"å©":1406,"å ":7145,"å":44204,"å":5038,"å":10173," ä¼":691,"å":15261," ä¸":1418,"å":2396,"å¥":3548,"å©":8617,"å°":6362,"å¶":3819," ä¹":601,"å":3204,"å":10854,"å¯":861,"åµ":4236," 大":752,"å·¥":9304,"å·²":3034,"å·´":5247,"å·":7520,"
å·":3027,"å±±":12644,"屬":10111,"å±":3787,"å±":1099,"å±":2380,"å±
":1946,"å°¼":4905,"å°±":2895,"å°":3522,"å°":2442,"å°":12131,"å°":25320,"å°":2988,"å°":3315,"å°":4225,"島":4996," å¤":609,"岸":1457,"岩":955,"å½":1036,"å½¢":4081,"å½¹":939,"å½±":4543,"å¼":7142,"å¼":4806,"å¼µ":2155,"å¼·":1613," å°":712,"念":1686,"å¾":1561,"å¾":1728,"å¾":11603,"å¾":4825,"å¾":3819,"å¾·":8443,"å¹¹":610,"å¹´":47473,"å¹³":5943,"å¸":2671,"å¸":627,"å¸":2358,"å¸":6774,"常":6175,"帶":2450,"師":2700,"å¸":1237,"延":769,"建":8865,"廣":7680,"å» ":829,"åº":1070,"åº":1431,"康":1164,"度":6845,"座":2605,"大":34205,"央":1335,"天":6785,"夫":2310,"太":3614,"失":939,"å¤":5866,"å¤":11013,"奧":3348,"女":3636,"好":1255,"å¦":3939,"å§":3424,"å§":2437,"å ´":
5840,"å ±":2620," å":619,"å¢":971,"å¢":2254," å¨":1006,"å":9582,"å":2418,"å¸":28894,"å®":5183,"å®":4320,"å®":6314,"å®":2121,"å®®":11665,"客":2255,"宣":1095,"家":33259,"å¯":1263,"å¯":1751,"å¯":902,"寫":2568,"實":4042,"å¨":2163,"åª":887,"æ´":2088,"æ²":2693,"æ¾":3297,"æ¸":4421,"æ¯":61135,"æ ":606,"æ¥":891,"æ":4383,"æ":1898,"æ®":2726,"æº":821,"æ":14789,"æ":728,"æ¶":926,"æ":7015,"æ":2193,"æ¥":1544,"æª":4199,"æ¨":1871,"æ¬":13516,"æ":1009,"æ":3760,"æ":7068,"æ":950,"æ":15722,"æ":25006,"æ":2638,"æ":10258,"æ":16872,"æ¾":993,"æ±":12692,"æ":1739,"æ":1578,"æ":2488,"æ":1563,"æ":2801,"·ä¸":953,"·ä¸":684,"·ä¸":742,"æ":1908,"æ":14926,"æ¼":85512,"æ½":1293,"æ¯":13761,"æ":958,"æ
":1333,"æ
":3698," æ":592,"æ¥":21067,"æ©":2137,"æ¾":2161,"æ¹":3194,"æ¶":1971,"æ¯":2879,"æ":9547,"æ
":1906,"æ¸":7013,"æ´":1506,"æ¬":3403,"æ":7094,"æ":3534,"æ":2726,"æ":915,"æ":1689,"æ¿":1001,"æ¹":886,"æ":11497,"æ":3280," å¹´":38814," å¹³":1137,"æ":1356,"æ°":6668,"æ²":2673,"æ¿":1031,"æ":858,"æ":16686,"æ":10999,"æ":4217,"æ¨":2160,"æ§":1213,"æ¥":3506,"æ":1256,"æ":2131,"æ":5892,"æ
":1951,"æ¯":1210,"æ":3488,"æ":1000,"æ":2219,"游":601,"測":1417,"港":10903,"æ¸
":2633,"æ¹":3859,"æº":2955,"滿":823,"æ¼¢":2629,"æ¼":3758,"æ¾³":2170,"æ¿":2246,"æ°":10942,"æ°´":5379,"æ°£":1960,"æ±":5858,"æ±":1019,"決":1728,"æ²":1469,"æ²":2360,"æ²³":5480,"æ²¹":1041,"æ²»":5045,"æ³¢":2725,"æ´¾":1934,"æ´»":2827,"æ´²":5278,"æµ":4541,"æ¶":1
003,"æ·±":1422,"æ©":7988,"æ¨":1698,"æ¨":3283,"æ¨":5292," æ¯":2270," æ":13683,"æ¦":2208,"æ¤":5462,"æ¢":1051,"æ£":4102,"æ·":3537,"æ²":679,"æ»":6673,"æ":2412,"æ":2776,"次":4908,"款":922,"æ¬":2775,"æ¯":1810,"æ¯":4792,"æ¯":761,"殺":3118,"æ¡":1653,"æ ½":2979,"æ ¹":2237,"æ ¸":1380,"æ ¡":3603,"æ¢":3385,"æ¥":679,"æ¥":8348,"æ¤":5169,"æ§":3354," æ¥":10789,"æ¦":795,"ç³":3331,"ç¥":2143,"ç":5241,"ç¾":1402,"ç":2166,"ç":145617,"ç®":9288,"ç´":3002,"ç¼":12643,"ç½":2538,"ç¾":1845,"ç»":1101,"ç
":1342,"çº":605,"ç¸":897,"ç°":1992,"ç±":17477,"ç¨":13939,"ç¢":5389,"ç":19162,"ç":1400,"ç¶":5199,"ç«":2821,"ç¥":871,"ç":781,"ç":6587,"ç°":2430,"ç":9565,"ç":7020,"åï¼ä¸":683,"ç¾":8423,"ç":1762,"ç":5328,"ç":2259,"ç�
�":1989,"ç":2524,"ç":1136,"ç©":13950,"ç¹":9797,"ç":2331,"ç¾":10487,"ç":2531,"ç
§":1001,"ç¶":2724,"ç¡":3430,"ç£":8336,"ç«":1815,"ç½®":1766,"ç½²":979,"ç¾
":6361,"ç¾":10443,"群":2244,"義":5695,"ç¿":841,"總":5592,"縣":5350,"ç¹¼":1038,"çº":1407,"ç´¢":1368,"ç´ ":1525,"ç´":2465,"ç´":5697,"ç´":4111,"ç´":4056,"çµ±":6648,"çµ":5814,"çµ":3076,"ç¶ ":801,"ç¶":3676,"網":4429,"ç¶":8789,"ç¶":642,"ç·¨":2471,"ç·":7038,"ç°¡":4320,"ç±³":9228,"ç³»":6773,"ç":12258,"ç":935,"第":11129,"ç¯":721,"ç¯":2370,"ç®":1983,"ç©":2283,"究":2927,"空":4143," ç":1486,"ç¨":4595,"稱":17006,"種":13270,"ç«":6147,"ç«":9659,"競":659,"ç«¥":907,"çª":731,"票":889,"ç¥":3459,"åï¼å¤":1245,"社":4421,"示":1588,"移":1020,"ç§":736,"ç§":10232,"ç¦":3334,"ç
´":649,"è¦":8322,"è¦":2224,"è¦":4488,"親":1395,"è§":2300,"è§":3003,"解":2370,"西":17796,"被":7902,"製":3213," è¬":1397,"è¡":1641,"è¡":2230,"è¡":4294,"è¡":14995,"表":5436,"è®":2770,"è¯":3201,"è¦":737,"è°":3043,"è·":1664,"è":1490,"調":1479,"說":3940,"èª":12000,"èª":2740,"è«":3370," è":1554,"è¨":6262,"è¨":2497,"è¨":5475,"è¨":1564,"è¨":2973,"該":4761,"話":1875,"è©":1093,"è·¯":11367,"è¶":2102,"è¶
":1905,"足":2739,"èµ·":4291,"è³":786,"è³":4396,"è³½":6003,"質":2688,"è²»":1408,"貨":906,"責":1207,"象":1864,"è°·":1752,"è¿":3107,"辦":2818,"è¾²":1197,"è½":1925,"è¼":2015,"è¼":1405,"è»":6562,"身":2869,"è´":1208,"è³":9837,"èª":8751,"è":9484,"è
":7426,"è":13745,"è":2663," ç±³":6617,"è¯":14082,"è²":1069,"è
":1122,"è¡":
1263,"è²":2954,"è½":5599,"è":3974,"è²":888,"è¯":6888,"è¬":2564,"èª":2033,"è":1795,"è":3075,"è":14328,"è¦":940,"è¾":758,"è²":3577,"è±":3113,"è±":9681,"è":2269,"è¥":973,"è":1953,"è":4637,"è¬":2757,"è½":1486,"è":673,"è":4121,"è":5640,"è":5557,"è":3088," è±":759,"風":2494,"é£":1389,"é£":1667,"é ":2927,"é ":955,"é
":2370,"é¡":5147,"馬":6983,"é¦":9518,"館":2367,"é«":9444,"é«":8889," é¿":586," é¦":621,"é»":3460,"é»":1831,"é»":2289,"黨":2390,"é¾":3877,"é£":2837,"é ":2926,"é²":5235,"é":620,"é":6954,"é":7591,"é":2179,"éº":895,"é¸":3181,"é":6349,"é":6466,"é":3024,"é":4169,"é":1604,"é":2782,"é¡":912,"é¨":14529,"é½":5256,"é
":1052,"é«":1928,"é":5699,"é":1096,"é":3732,"é":6615,"é":55
91,"é":1423,"é":1696,"é®":2372,"éµ":4592,"é":8271,"é":7281,"é":4672,"é":626,"é":2369,"é¢":5168,"é¤":1568,"é³":1215,"é¸":5523,"é½":2042,"é²":1167,"é¿":4360,"é¢":1505,"é£":627,"é»":10735,"é²":2700,"é":3986,"é":4571,"é":2396,"é":3371,"é¢":6432,"é":1196,"é¿":1404,"é³":4823,"ê°":589,"ï¼":53309,"ï¼":53597,"ï¼":1381,"ï¼":211682,"ï¼":15729,"ï¼":5850," ï¼":1546," ï¼":2103," ï¼":4859,"è»ç«ï¼":720,"åçç¹":944,"æ¤ç©ã":2875,"æ¤ç©ï¼":1199,"åºä¸æ":662,"çå°ï¼":2994,"æ°å
±å":1261,"ã ":3480,"ã ":1427,"ã ":591,"ã ":624,"å大é¸":3816,"å°åï¼":3519,"ãé":1771,"ãé²":1235,"ãç":1264,"ãç¦":680,"ãç¾":601,"ãç":835,"ãç±":929,"ã西":1055,"ã
該":1503,"åå·ã":1228,"ã廣":1717,"ãå±±":2030,"ãå®":622,"ãå®":1379,"ãå":1327,"ãå¨":1916,"ãå°":622,"ãå":2945,"ãå":792,"ãå°":867,"ãå«":1945,"ãå
¶":1173,"ãæ¹":1562,"ãæ²³":1284,"ãæ±":1104,"ãæ¤":759,"ãæ¯":802,"ãæ¥":634,"ãã":1707,"ãã":1196,"ãã":974,"ãã":1063,"ãã":966,"ãã":2150,"ãä¸":594,"ãä¸":727,"ãä¸":592,"ãä¸":880,"ãä¼":613,"ãä»":1366,"ãä¸":1131,"ãä¸":588,"ãä¹":718,"ã人":863,"ãä¸":3044,"ãä¸":930,"ãä¸":3444,"ãä¸":1334,"ãä¸":2563,"ãä¸":4244,"ãä¹":1329,"ãä¸":586,"ãä¸":1165,"ãä¸":959,"ãä¸":1017,"ã¢ã¢":1643,"å°åç":687,"ãï¼":1271,"
ãï¼":3388,"ãï¼":800,"ãï¼":2216,"ãï¼":1818,"å¨ä¸å":1179,"ç«æ¼ ":1061,"æ ½å¹ã":2896,"æ¯é¦æ¸¯":1720,"ä¼ ":665,"使 ":756,"ä¸ã":2094,"ä¸ã":739,"ä¸ã":981,"ä¸ã":585,"ä¸ã":2906,"ä¸ã":2027,"ä¸ã":2136,"ä¸ã":1921,"ä¸ã":1804,"ä¸ã":1947,"ä¸ã":1850,"ä¸ã":693,"ä¸ã":1789,"ä¸ã":2245,"ä¸ã":800,"人ã":1182,"äºã":826,"交ã":660,"äºã":852,"å« ":745,"ä¸å±±":729,"ä¸å±¬":937,"ä¸å®¶":1629,"ä¸å°":1088,"ä¸å®¶":1181,"ä¸å±¬":1243,"ä¸å":13565,"ä¸å¸":945,"ä¸å":859,"ä¸å®¶":831,"ä¸å¤§":655,"ä¹å«":1335,"ä¸å¤§":605,"ä¸å°":870,"ä¸å¤§":588,"ä¹ä¸":705,"ä¹ä¸":736,"ä½ã":1221,"ä¹ä¸":663,"ä½ã":761,"ä¸å�
�°":748,"ä¸å":584,"ä¹ä¸":4708,"ä¸å":1032,"ä¸å":850,"ä¸å":1045,"ä¸å":615,"ä¸å":2051,"ä¸å":723,"ä¸ä½¿":2667,"ä¸å":1235,"ä¸ä¸":646,"ä¸ä½¿":1157,"ä¸å«":1278,"ä¸å":1514,"ä¸äºº":604,"ä¸å
":1050,"ä¸å
¶":690,"ä¸ä½¿":877,"ä¸å":758,"ä¸ä½":1297,"ä¸å
¬":954,"ä¸å":1850,"並ä¸":839,"ä¸å«":1815,"ä¸ä¼":1067,"ä¸å«":1781,"ä¸ä½¿":1931,"ä¼ã":1287,"ä¼ã":1097,"ä¸å
":614,"ä¸ä¸":3784,"ä¸ä¸":839,"ä¸ä¸":2627,"ä¸ä¸":3712,"ä¸ä¼":1238,"ä¸ä¸":2873,"ä¸ä½":865,"ä¸äº¤":3733,"ä¸äº¬":1465,"ä¸äº":953,"ä¸ä¼":1744,"ä¸å":9748,"ä¸ä½¿":1114,"ä¸ä½":1124,"ä¸å«":2045,"ä¸ä¸":2819,"ä¸ä¸":4439,"ä¸ä¼":656,"ä¸ä¸":779,"ä¸ä¸":49
29,"ä¸ä¸":2910,"ä¸ä¹":689,"ä¸ä¸":986,"ä¸ä¸":10533,"ä¸ä¸":5787,"ä¸ä¸":4591,"ä¸ä¼":2124,"ä¸ä¹":678,"ä¸ä¸":1486,"ä¸ä½":1048,"ä¸ä¹":1046,"ä¸ä½":630,"ä¸äº":641,"ä¸ä½¿":3115,"ä¸ä¸":1385,"ä¸ä¸":1326,"ä¸ä¸":1047,"ä¸ä¸":1140,"ä¸ä¸":1080,"ä¸ä¸":689,"ä¸ä¸":3008,"ä¸ä¸":3158,"ä¸ä¸":5062,"ä¸ä¸":3771,"ä¸ä¸":1124,"ä¸ä¹":842,"ä¸äº":1051,"份ã":614,"ä¸ä¸":1125,"ä¸ä¸":5951,"ä¸æ¨":705,"主æ":1120,"ä»»ä½":653,"ä¸æ¥":742,"ä¸æ":1114,"人å£":1812,"ä¸æ¬¡":950,"ä¸æ":808,"ä¸æ¢":1013,"ä¹å¾":1287,"ç¨®æ ½å¹":2894,"ä¸æ¼":989,"ä»å":991,"äºå ´":701,"ä¸æ¼":627,"ä¸æ¼":1045,"人使":893,"ä¸æ¯":707,"ä¸æ¯":753,"ä¸æ":3356
,"äºå":705,"人ä¸":915,"人ä¸":695,"主å¸":690,"ä¸æ":802,"ä¸æ¯":911,"ä¸æ¼":991,"ä¸æ":610,"ä¸æ":718,"交ä¼":759,"ä¹å¤§":1193,"äºå":628,"ä¸å¼":1114,"ä¸å¸":1267,"交ä¸":718,"使ã":1730,"使ã":1391,"ä¸åº":631,"ä¸åº§":888,"ä¸å¤®":1281,"ä¸å°":677,"ä¸å±¬":955,"äºäº¤":1241,"ä¸å®¶":1096,"ä¸å¯«":642,"ä½å":651,"ä¸ç":2868,"ä¼å¦":836,"ä¸ç§":1482,"ä¸ç¨®":4747,"ä¸ç":2226,"份年":660,"使ä¼":786,"使代":599,"使ä¸":614,"äºæ¬¡":594,"使ä¸":1261,"使ä¸":902,"使ä¸":966,"使ä¸":904,"使ä¸":680,"ä¸ç":914,"ä¸ç":3842,"ä¸ç":1685,"ä¸ç":3553,"ä¹æ±":1087,"ä½ä¸":598,"ä½ä¸":890,"ä¸ç":2951,"ä½ä¸":741,"ä½ä¸":1040,
"ä½ä¸":1316,"ä¸ç":995,"交æ¼":631,"ä¸ç":2516,"ä¸ç":1824,"å«ã":904,"å«ã":1101,"ä¸ç¾":927,"ä¸ç¶":602,"ä¸ç¾":1156,"ä¹æ":714,"ä¸ç¾":1191,"人工":3084,"ä¸ç":618,"ä¼ä¸":1588,"ä¼ä¸":1289,"ä¼ä¸":1088,"ä¼ä¸":1470,"以å":5705,"ä¸æ²»":1653,"ä¼å«":752,"ä¹æ¯":2445,"ä¹æ¯":612,"ä¼ä½":739,"ä¼ä¼":1180,"ä¼ä½¿":949,"ä¸èª":647,"交ç":662,"ä½æ":648,"ä¸è¥¿":1266,"ä½æ¯":741,"人ç":1304,"ä¸èª":843,"主義":1789,"ä¸è":617,"äºç":883,"人ç©":959,"ä¼æ¥":965,"ä¸è¥¿":590,"ä¸è¯":3016,"ä¸è¬":2431,"ä¸è¯":718,"人æ°":2356,"ä¸è³":635,"ä¸è":589,"使å":1465,"å ":869,"ä½å®¶":983,"ä½å":1437,"使å«":938,"ä¸ç´":2133,"
äºæ´²":978,"ä¸è¯":1080,"ä¹ç":932,"å° ":1125,"ä¸ ":1327,"ä¸ ":1248,"稱æ¼ã":1031,"ä¸ ":1426,"ä¸Â·":665,"ä¸ ":1214,"ä¸Â·":957,"å°ã":843,"åæ¥":792,"åã":913,"å«ï¼":2201,"åè¯":655,"åå®®":915,"å®ä½":908,"å¡æ":1240,"å¯è½":966,"åé¨":1208,"å稱":1366,"åè":743,"å種":710,"åç":1486,"åç":917,"å稱":2281,"使ï¼":775,"使ï¼":3019,"ä½ï¼":692,"ä½ï¼":2446,"å°ç£":5826,"ä½ï¼":895,"ä½ï¼":878,"åªæ":589,"åç":606,"ä¼ï¼":898,"ä¼ï¼":2604,"åæ":1361,"åå":2079,"åæ¼":1540,"å¨ ":4228,"å½å":845,"åä¸":658,"åä¸":617,"åç":1565,"åµè¾¦":644,"åä¸":1045,"åä¸":617,"å身":709,"交ï¼":1096,"人ï¼":2364,"å
æ":687,"åé¡":693,"åç":604,"äºï¼":1473,"åç«":1122,"åç":742,"åäº":1733,"å æ¤":1472,"åæ°":1001,"åºä¸":1219,"åºä¸":778,"å æ¼":1311,"å¤çè":623,"åï¼":730,"å大":4188,"åå·":1895,"å°å":6934,"åå":1282,"å¨å°":591,"å家":4180," å
¬é":1210,"åä¸":723,"åä¸":683,"åä¸":1346,"å人":979,"å
§ï¼":588,"å¨ä¸":590,"å¨ä¸":766,"å¨ä¸":1169,"å¨ä¸":857,"å°ä¸":1518,"å¨ä¸":1499,"å¹ã":2897,"ä¿è·":897,"å
¬å":1063,"å
¨å":1075,"å
±å":2094,"å
±å":713,"å
¬å¸":6083,"å ":817,"åä¸":734,"åã":848,"åã":713,"使è":928,"ä½è²¬":846,"å«ç¾":803,"å ":587,"ä¾èª":904,"人é¡":938,"å
¶ä»":1807,"å
Œ
±":721,"å
«ç¨":717,"å
§ä¿¡":886,"å
¶ä¸":1932,"å
©å":962,"å«ç":1758,"ä¿ç¾
":1193,"å
§ä¸":817,"交é":965,"代表":1917,"使ç¨":3276,"使ç":2623,"ä¾ç":817,"使ç":2298,"ä½ç":820,"ä½ç":1260,"ä½ç¨":690,"ä¹é":1600,"å«å·":1179,"ä½ç":823,"ä½è½":735,"å«æ":1307,"ä¹é¾":820,"å«æ¯":769,"使究":2861,"å«ä½":704,"ä¸é¨":1510,"å«ä¼":862,"代ç":605,"å«å«":983,"å«å
":676,"使æ¼":674,"使æ¬":654,"ä½æ¼":6989,"å人":1069,"ä¸è¿":678,"ä½æ¼":1594,"ä¸è»":636,"主è¦":4266,"ä¸é¨":1589,"ä»ç":940,"å«ä¸":1719,"å«ä¸":1808,"å«ä¸":1219,"å«ä¸":1126,"å«ä¸":896,"ä¿¡æ¯":690,"ä¸é«":712,"å«å¤":1308,"å«å":1159,"ä¼ç«":658,"人è":943,"
ä¼ç":1677,"ä¸é¢":1013,"åä¸":747,"åä¸":2170,"ä¸ï¼":1556,"ä¸ï¼":3869,"ä¸ï¼":946,"ä¸ï¼":1460,"ä¸ï¼":2498,"ä¸ï¼":5085,"å島":716,"ä¸ï¼":1457,"ä¸ï¼":2375,"ä¸ï¼":1026,"ä¸ï¼":3944,"ä¸ï¼":1688,"å
æ¬":3326,"åå ":686,"å¡ã":603,"ä¸ï¼":3370,"åå
¶":661,"åµç«":812,"åå":722,"ä¸ï¼":4229,"ä¸ï¼":1428,"ä¹ï¼":704,"åç©":1355,"åå":874,"åç£":831,"å°åº¦":2180,"å¯ä»¥":2687,"å°ä¸":1162,"åç":1695,"åæ":1036,"å使":755,"åç«":650,"åä½":595,"å
å«":733,"åä¸":932,"å
¬è·¯":1194,"åå¸":839,"æ¼é¦æ¸¯":1347,"å©ç¨":694," ä¸ç´":1443,"åå":760,"å
¬é":2070,"å
§ç":698,"å«é«":1121,"åå°":2709,"åæ¼":856,"å�
�ä¸":1027,"åä¼":691,"åä»»":888,"åä¸":751,"åä¸":1379,"åä¸":1206,"åä¸":1033,"åå«":1202,"åä½":918,"åã":909,"åå¥":960,"å©äº":1460,"å³çµ±":899,"å
±æ":696,"å
·æ":1054,"åã":805,"åã":855,"åã":2848,"åµä½":724,"åå¸":3537,"å
¨ç":780,"å ä¹":907,"å ´ï¼":657,"å·äº":599,"å·¥ä½":1351,"å·´ä¹":819,"å¸é¢":1919,"屬ç":3350,"度ã":646,"å¸å":1099,"å·¥å¼":2892,"å¹³ä¸":1295,"山谷":592,"年代":1818,"å¸å¨":1588,"山西":699,"å°é½":651,"實é":604,"建ã":593,"å°èªª":1488,"巴使":882,"å¸ä¸":1033,"å°æª":2823,"å®ç":781,"å·ã":1207,"å°ï¼":4121,"å·ã":1254,"ãé²å":1181,"å°æ¼":1252,"å®ç":613,"å¸ç�
�":914,"å±±å¡":1674,"åï¼":702,"å¸ç":932,"å°ç":1375,"å±±æ±":747,"屬æ¼":2028,"德·":729,"å®ç¾©":598,"åªé«":593,"å°±æ¯":946,"家ç":1003," å¹´ ":13455,"å½¢å¼":845,"å½¢æ":781,"å¾·å":1532,"å¾ä¾":987,"廣æ":596,"廣æ±":1877,"建æ¼":727,"å¹´ç":1359,"建ç«":1105,"å¼ç¨®":2905,"å¹´è³":809,"æ ":3138,"廣ä¹":652,"å¤ï¼":1036,"å·¥æ¥":864,"å¸æ¼":1775,"建ä½":1764,"å¹´å¨":773,"廣å·":765,"å·¥ç¨":1106,"å·²ç¶":809,"常ç":722,"å»£å ´":674,"åé«":692,"大å¸":4750,"åé":3079,"åï¼":4017,"åï¼":805,"大æ°":639,"å¸ï¼":903,"å¸ï¼":707,"太平":715,"å§å¡":1628,"å¤ç":1260,"åï¼":770,"åï¼":1822,"åç«":626,"åç":3067,"åºæ¬":664,"
å¢å
§":659,"åï¼":4922,"大å©":1318,"大使":1011,"家 ":671,"天ä¼":592,"天主":779,"大ä¸":858,"大ä¸":898,"å¸ä¸":593,"大é¸":4384,"å¸ä½¿":615,"å®ä¸":874,"åå¨":831,"å¸å":3781,"家ä¸":660,"家ä¸":836,"å¸å®¶":1871,"島ã":590,"å¹´ ":13925,"å°ä¸":726,"å°ä¸":935,"家å":1674,"å°å«":637,"å°ä¿":1024,"å°¼äº":1180,"å¸æ ¡":1548,"å®æ¯":687,"家å¾":639,"å®æ":606,"å±±ä¸":856,"大ç":1820,"åï¼":659,"å¸ã":1095,"æªç±äºº":2697,"å¡ï¼":1301,"太空":651,"家ã":1074,"家ã":1302,"大é¨":622,"æ±ä¸":1591,"æ¯ç±":1946,"æå°":649,"æ¼ç¾":890,"æ大":1529,"æï¼":1008,"ææ©":863,"æ¯ç¾":1276,"æ¥è³":621,"æå¾":917,"æç":620,"�
�å®®":1146,"æä¸":937,"æä¸":865,"æä¸":744,"æä¸":654,"æ±å":851,"æ¯æ¼":666,"æ¯æ¥":750,"æä¸":682,"æè²":1764,"æ¯ç¹":828,"æ¯æ":1896,"æå":1801," æ¥ ":1059,"ææ":1763,"æä¸":1032,"æä¸":631,"æ ½å¹":2950,"æé":1470," æ ":10453,"æç":813,"æªç±":2698,"æç":742,"æ±ç":644,"æ¯é¦":1731," å¹´ï¼":2603," å¹´ï¼":2399,"ææ¤":939,"ææ":596,"æç":752,"æ¼é¦":1366,"æ
äº":841,"å¾ï¼":2045,"æä¸":618,"æä¼":765,"æ使":808,"ãæ¹ä¸":767,"æ¯ä¸":773,"æå":2109,"ãæ±è¥¿":640,"æ¼ä¸":1931,"æ¼ä¸":1279,"æ¼ä¸":3104,"æ¼ä¸":3971,"æ¼ä¸":2015,"æ¼ä¸":1568,"æ¼ä¸":3228,"æ¼ä¸»":1138,"å¼ï¼":713,"æ¼ã":3160,"æ¯�
�":751,"æ¯å°":621,"æ代":1179,"æ¯å¨":1454,"æ±ã":1223,"æ¼æ¯":782,"æ¼æ¥":1024,"æ©æ":592,"æ¥æ¬":5548,"æ¼å«":790,"ãæ¹å":720,"åï¼ï¼":3240,"æ¼äº":898,"æ¼äºº":919,"æ¯å¦":674,"æå¸":1074,"æ¼å°":1510,"æ¸å¸":761,"æ¼å":777,"æ¸æ":647,"æ¼å±±":668,"æ¯ä½":626,"æ¯ä¸":1220,"æ¯ä¸":853,"æ¯ä¸":2852,"æ¥å¨":676,"æ¯ä¸":1400,"æ¯ä¸":8457,"æ¯ä¸":724,"æ¯ä¸":2046,"ææ":604,"æ¯ ":1586,"æ¨ä¸":777," å¹´ç":915,"æ稱":642,"æç«":2635,"æ°ç":1266,"æç":1247,"æ¥ ":1136,"æ¼ ":12568,"æ¯Â·":1030,"ææ":1063,"æç¨":957,"æä¸":1113,"ææ¼":2501,"æè³":589,"æ ":10505,"æ ":1922,"æè¡":1647,"å¹´ï¼":1135,"å¹´ï¼":2513,"å¹´ï¼�
�":3020," å¹´è³":781,"æä¸":748,"æä¾":1573,"æ§å¶":697,"æè
":865,"å®®ï¼":689,"家ï¼":883,"家ï¼":2956,"å¸ï¼":695,"å¸ï¼":1273,"åï¼":930,"æã":673,"å¹´é":605,"廣西":1228," å¹´å¨":693,"屬ï¼":1280," 年代":1445," å¹³ä¸":1026,"æå¡":1192,"æ以":843,"æä½":668,"å½±é¿":1080,"å°ï¼":1059,"å§å¡æ":1211,"æµè¡":628,"æ¥ï¼":586," æ¥ï¼":775," æ¥ï¼":1367,"æºæ¼":682,"ç¾Â·":762,"æ²æ":1285,"æ´»å":1238,"æ¯è³½":1111,"æ±è":807,"æ±è¥¿":972,"æ¹ä¸":1117,"æ¹å":1294,"æ±ã":1010,"æ¯ï¼":607,"æï¼":2388,"æ©æ§":1217,"æ»å°":1361,"ã廣æ±":743,"æ¥ï¼":1544,"æ¥ï¼":813,"æ·ä½¿":2356," æ¥è³":616,"æ°å":1569,"ææ´²":1588,"æ°�
�¸»":899,"æ°å
±":1266,"æ£å¼":1274,"æï¼":1278,"æ²³å":853,"æï¼":734,"ã廣西":861,"æï¼":853,"æï¼":980,"æ°æ":1066,"æï¼":613,"æ²³ä¸":810,"æ ¹æ":1260,"æè°":766,"æé«":966,"æé":1013,"æé":1519,"æ¨ä½":1052,"æ¤ç©":4908," æ¯ä¸":681,"æ¦å¿µ":627," æ¥å¨":656,"çé»":999,"稱ã":1043,"ç西":633,"ç¼è¡":1127,"çé":753,"第 ":1807,"çç¼":596,"ç第":1621,"ç©ï¼":2112,"ç®ç":1040,"çæ¯":1255,"çæ":739,"çæ¤":2894,"ç´æ¥":612,"ç¼ç¾":996,"çç¹":1537,"ç© ":785,"ç¼ç":1316,"çå°":648,"çå°":1527,"ç家":632,"ç¼å±":2338,"ç大":1229,"çå":1195,"çå°":4227,"ç®å":4386,"çä¸":3310,"çä¸":9461,"ç¨ç":1238,"�
�ç":842,"ç¢ç":949,"çç¢":964,"çé":888,"çç©":1143,"çè":4477,"çè
":1049,"çå":796,"çå":1368,"çå«":1653,"çä¼":1719,"ç使":1916,"çä½":1046,"çä½":651,"ç主":1361,"çä¸":1197,"çä¸":3244,"çä¸":2994,"çä¸":5094,"çä¸":2456,"çä¹":747,"ç人":1899,"ç交":1021,"çæ¼":2502,"ç¨æ¼":1261,"ç±æ¼":1686,"çè«":1080,"ç¶æ":1405,"çæ´»":843,"çå¨":746,"ç¢å":771,"çã":900,"çã":764,"çã":1273,"ç°å¢":892,"ç±äºº":2997,"ç¨ä¾":777,"çå¸":1070,"ç¾å¨":1079,"çæ":657,"ç¾ä»£":897,"ç²å¾":1128,"ç ":1662,"ç¨ã":731,"çä¸":801,"ç©ç":701,"ç¹æ":1087,"ç¹å¥":1033,"æ¾³é":921,"ãç¦å»º":587,"ç©ã
":3616,"è¯å":1423,"èæ":666,"èå¨":1114,"çï¼":840,"èæ¼":3447,"è
ã":797,"網路":1657,"總統":750,"稱ï¼":608,"種ï¼":743,"ç¾å":6254,"æéå
¬":1342,"義大":928,"群島":661,"ç¾
æ¯":1391,"è¯äº":701,"ç¾æ´²":846,"ç«ï¼":1417,"ç«ï¼":605,"è¯ä¼":1628,"ç«ï¼":947,"ç¶æ¿":1788,"çµ±ç":590,"ç¶ç":801,"ãçè
":803,"ç·ç":712,"è³ ":5663,"èª ":699,"ç´å¿µ":700,"簡稱":3399,"çµæ":1456,"ãç±æ¼":638,"è ":661,"ç±³ç":3024,"çµæ§":812,"ç±³è³":2731,"系統":2899,"統治":615,"ç®ï¼":621,"çµä¸":586,"çµä½":2037,"ç´ä¸":598,"系使":1485,"ç¯ç®":923,"ç«æ¼":1349,"çå°":3334,"çï¼":1232,"天主æ":743,"ç«ç":1251,"çï¼":616
,"ç¨ï¼":772,"ç¨å¼":650,"ç§æ":622,"ç´ ":1720,"第ä¸":1132,"第ä¸":3441,"第äº":2120,"稱æ¼":4215,"ç¨®æ ½":2894,"ç§ä¸":1047,"çã":1259,"ç§å¸":1716,"ç¦å»º":1133,"社æ":2251,"ç«ã":627,"è§è²":606,"è¨ç®":1063,"è¨ç«":673,"西é¨":622,"製é ":737,"èªæ¼":1497,"西ç":773,"è¦ç":1104,"被稱":1048,"è¨å":875,"è¶å":953,"è³æ":957,"è³½äº":631,"èªè¨":1720,"è¨è¨":1777,"èï¼":798,"èï¼":614,"è¯æ¼":650,"è¶
é":651,"è·¯ç·":849,"足ç":1703,"è³è¨":664,"è¡ï¼":1162,"é«å¸":647,"é²è¡":2272,"é½æ¯":704,"éç¨":791,"é種":792,"é¨å":2342,"é¨ä»½":776,"éæ²":1924,"é£æ¥":662,"é常":1653,"éå":2496,"èªï¼":3929,"說ï¼":690
,"è»ç«":2410,"éå":1488,"éäº":923,"æ¼æ¥æ¬":917,"èªæ²»":728,"èªç¶":792,"è±å":2374,"èªç±":877,"è¬ç":678,"ç·ï¼":642,"èªç©º":1143,"èå°":752," ç±³ç":2958,"èªä¸":786,"ç¾
馬":1149,"èä¸":666," ç±³è³":2728,"ç±³ï¼":753,"è¯è³½":916,"è¯æ°":1238,"è±èª":2016,"è
ï¼":759,"èè¡":1293,"è±æ":2836,"è辦":651,"è¯äºº":1471,"èå":1732,"èè¯":599,"èç":911,"èè¡":1217,"è¡ã":771,"èï¼":628,"è¡æ¼":853,"製ä½":1162,"西å":747,"è¡ç":1255,"西份":1005,"西äº":1235,"西ä¸":1211,"è¡ä¸":2204,"èª ":675,"西ã":2735,"ä¸ï¼ç®":813,"æ¯ä½æ¼":586,"éï¼":1121,"æ¯ä¸ç¨®":2255,"é¦æ¸¯":8631,"é«ä¸":607,"é«ç":647,"éï¼�
�":588,"é«é":865,"é«è²":810,"åä¸å":2028,"å
¬éï¼":1100,"é«ï¼":795,"æ¯ä¸å":2235,"ä¸ï¼å¸":765,"æ¯ä¸å":2143,"æ¼å°ç£":1145,"é¸è":733,"éé":1270,"é¨ç":1066,"è³½ï¼":689,"è·¯ï¼":930,"éè¦":1746,"éè¡":879,"éµå®¶":726,"ç第ä¸":601,"éè¦ç":694,"éå§":2056,"é¨ï¼":1123,"é¤äº":587,"éå
¬":1343,"éµè·¯":1902,"é»å":1154,"é²å":1736,"é¿æ":770,"éç":1117,"éç¼":1451,"é¸ç":3080,"éï¼":1236,"é»å½±":2042,"é¿ç¾":603,"é»å":1071,"é¢ç©":1525,"éæ´²":681,"é å":708,"é
ç®":726,"é»è¦":2609,"é³æ¨":1761,"éè¦":625,"馬ä¾":712,"ææ¤ç©":938,"çç¹æ":970,"æ¯ç¾å":1183,"çæ¤ç©":2889,"æ¥è
³ ":606,"åå°æª":2703,"æ大ç":952,"æ¼ç¾å":799,"å ä¹å¤§":716,"æ¯æ¥æ¬":699,"ç®åå°":2707,"çå°å":3107,"çèå¨":1091,"çèæ¼":3237,"çè
ã":794,"çä¸é¨":774,"ç主è¦":668,"çä¸ç¨®":1474,"å¹´ï¼ï¼":737,"çä¸å":3094,"éå
¬å¸":1342,"å¹´ï¼ ":710,"æ¼ä¸æ":2919,"æ¼ä¸å":2310," ï¼æ¯":653,"åçå°":586,"ï¼ï¼":14128,"ï¼ï¼":3756,"ï¼ï¼":1173,"ï¼ï¼":1558,"ï¼ï¼":731,"ï¼ï¼":4068,"ç±äººå·¥":2898,"ï¼æ±":861,"ï¼æ":1487,"ï¼æ":1123,"ï¼æ¾":1063,"ï¼æ¯":13163,"ï¼æ¯":10839,"ï¼æ¼":6392,"ï¼æ¤":660,"ï¼å¾":1253,"ï¼å¾":882,"ï¼å¸¸":1047,"ï¼æ
":644,"ï¼æ¼":2446,"ï¼æ":584,"ï¼æ":993,"ï¼æ
":1331,"ï¼æ":1293,"ï¼ç":2203,"ï¼ç®":3631,"ï¼ç°¡":2031,"ï¼ç¬¬":778,"ï¼ç¾":1338,"ï¼ç±":3905,"ï¼ç¨":642,"ï¼ç":3524,"ï¼ç¶":880,"ï¼è¥¿":834,"ï¼è¢«":860,"ï¼è©²":1017,"ï¼ç¶":598,"ï¼ç¸½":744,"ï¼ç¾":597,"ï¼è":3222,"ï¼è±":973,"ï¼è":1255,"ï¼è±":2412,"ï¼é¦":721,"ï¼é":885,"ï¼é":1582,"ï¼ã":615,"ï¼ã":690,"ï¼ã":3711,"ï¼ã":2876,"ï¼å¸":3641,"ï¼å®":1379,"ï¼å±¬":841,"ï¼å°":787,"ï¼å°":650,"ï¼å ":2415,"ï¼å¨":4525,"ï¼å¤":1566,"ï¼å¤§":803,"ï¼å¦":966,"ï¼å":662,"ï¼å":861,"ï¼å":727,"ï¼å³":1028,"ï¼å":1219,"ï¼å
":1288,"ï¼å":952,"ï¼å":897,"ï¼å°":652,"ï¼å¯":1102,"ï¼å":2944,"ï¼å":1271,"ï¼ä¸":2771,"ï¼�
�ä¸":3583,"ï¼ä¸":2297,"ï¼ä¸¦":3993,"ï¼ä¸":1698,"ï¼ä¸»":1659,"ï¼ä»":666,"ï¼ä¹":1385,"ï¼ä¹":3255,"ï¼ä¸":751,"ï¼ä¸":5090,"ï¼ä¸":3996,"ï¼ä¸":2348,"ï¼ä¼":1371,"ï¼ä½":3353,"ï¼ä½":2751,"ï¼ä½":784,"ï¼ä½¿":1949,"ï¼äº¤":834,"ï¼äº¦":1195,"ï¼äºº":871,"ï¼ä»¥":3497,"ï¼ä»":1340,"ï¼ä»½":756,"ï¼å«":1535,"ï¼å
±":754,"ï¼å
¶":3556,"ï¼å
¨":1257,"ï¼ ":3933,"ï¼ ":996,"ï¼ ":7569,"ï¼ ":815,"é²åã":1086,"ä¸å±¬ï¼":787,"è¯æ°å":1215,"廣西ã":800,"ä¸åç":1325,"ä¸å大":3838,"ä¹ä¸ï¼":2265,"è±èªï¼":1339,"ä¸æ ":3137,"å¼ç¨®æ ½":2892,"ä¸å±¬ç":703,"人工å¼":2892," ï¼ï¼":596,"ä¹æ±ã":593,"ç¹ææ¤":935,"以åä¸":1990,"ä�
�è¬ç":678,"ä¸è¯äºº":1156,"è±æï¼":1510,"ä¸è¥¿ã":804,"ä¸è¯æ°":1237,"廣æ±ã":761,"è¯äººæ°":1156,"人æ°å
±":1261,"ï¼ï¼æ¼":1432,"ï¼ï¼æ¯":1898,"ï¼ï¼å":978,"ï¼ï¼æ¯":2411,"ï¼ï¼æ¯":1113,"ï¼ï¼ï¼":1004,"èåç":780,"ï¼ï¼ï¼":1524,"ä½æ¼é¦":588,"å¹´è³ ":713,"é¢ç© ":770,"å«å·ã":844,"ä¹éç":628,"æç«æ¼":922,"å
§ä¸ä½¿":611,"ä¿ç¾
æ¯":1193,"ï¼é¦æ¸¯":708,"ç©ãå":2798,"å
¬å¸ï¼":691,"å
¬å¸ï¼":864,"è¡ä¸å":1022,"å
±åå":1943,"åå¸æ¼":1717,"åå¸å¨":1578,"ããã":944,"ããã":864,"ï¼æ¼ä¸":674,"ï¼æ¯ä¸":645,"ï¼æ¯ä¸":2670,"ï¼æ¯ä¸":2178,"ï¼æ¯ä¸":1286,"æ¹�
�ã":689,"ããã":1321,"å°æªç±":2697,"å°ï¼ç":2880,"ï¼å¸¸ç":667,"ï¼æ¼ ":2197,"ï¼æ以":698,"ï¼å稱":1386,"ï¼å æ¤":1151,"ï¼å¤ç":1249,"èæ¼ä¸":2897,"ï¼å¸å":3634,"ãåå¸":2903,"ãå°åº¦":793,"ãä¹æ±":625,"ãä¸è¥¿":798,"ãå«å·":850,"ãå±±å¡":668,"被稱æ¼":856,"ãåå·":1250,"大é¸ç":3064,"ã¢ã¢ã¢":1275,"å¸åï¼":3509,"ï¼è±èª":1020,"ï¼è±æ":1336,"義大å©":914,"ï¼ç®å":3420,"西çä¸":773,"ï¼çè":2926,"ï¼ç°¡ç¨±":1929,"å¹³ä¸å
¬":818,"å·¥å¼ç¨®":2892,"ï¼å
æ¬":1128,"ç±³çå°":2889,"ï¼åå":585,"ï¼å¨ ":642,"ï¼å
¶ä¸":943,"ï¼ä»¥å":1003,"ï¼ä¹æ¯":1481,"ï¼�
�¸è¬":1212,"ï¼ä¸»è¦":1207,"ï¼ä½æ¼":2550,"ï¼ä¸¦ä¸":649,"ï¼ä¸å":911,"屬çæ¤":2808,"ä¸ä½¿ã":633,"ä¸ä¸ä¸":628,"æ±è¥¿ã":618,"æ¹ä¸ã":763,"ä¸åç":723,"ç±³è³ ":2729,"ä¹ä¸ã":1752,"ä¸å
¬é":761,"ä¸ä¸ï¼":616},"n_words":[4924775,1867501,309785],"name":"zh-tw"}
\ No newline at end of file
Modified: lucene/dev/trunk/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml?rev=1184754&r1=1184753&r2=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml Sun Oct 16 03:55:30 2011
@@ -62,7 +62,24 @@
</requestHandler>
<updateRequestProcessorChain name="lang_id">
- <processor class="org.apache.solr.update.processor.LanguageIdentifierUpdateProcessorFactory">
+ <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
+ <!-- Can take defaults, invariants and appends just like req handlers-->
+ <lst name="defaults">
+ <bool name="langid">true</bool>
+ <str name="langid.fl">name,subject</str>
+ <bool name="langid.map">true</bool>
+ <str name="langid.langField">language_s</str>
+ <str name="langid.langsField">language_sm</str>
+ <str name="langid.map.lcmap">th:thai</str>
+ <float name="threshold">0.5</float>
+ <str name="langid.fallback">fallback</str>
+ </lst>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory" />
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="lang_id_alt">
+ <processor class="org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory">
<!-- Can take defaults, invariants and appends just like req handlers-->
<lst name="defaults">
<bool name="langid">true</bool>
Added: lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java (added)
+++ lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java Sun Oct 16 03:55:30 2011
@@ -0,0 +1,62 @@
+package org.apache.solr.update.processor;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.junit.Test;
+
+public class LangDetectLanguageIdentifierUpdateProcessorFactoryTest extends LanguageIdentifierUpdateProcessorFactoryTestCase {
+ @Override
+ protected LanguageIdentifierUpdateProcessor createLangIdProcessor(ModifiableSolrParams parameters) throws Exception {
+ return new LangDetectLanguageIdentifierUpdateProcessor(_parser.buildRequestFrom(null, parameters, null), resp, null);
+ }
+
+ // this one actually works better it seems with short docs
+ @Override
+ protected SolrInputDocument tooShortDoc() {
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField("text", "");
+ return doc;
+ }
+
+ /* we don't return 'un' for the super-short one (this detector things hungarian?).
+ * replace this with japanese
+ */
+ @Test @Override
+ public void testLangIdGlobal() throws Exception {
+ parameters = new ModifiableSolrParams();
+ parameters.add("langid.fl", "name,subject");
+ parameters.add("langid.langField", "language_s");
+ parameters.add("langid.fallback", "un");
+ liProcessor = createLangIdProcessor(parameters);
+
+ assertLang("no", "id", "1no", "name", "Lucene", "subject", "Lucene er et fri/åpen kildekode programvarebibliotek for informasjonsgjenfinning, opprinnelig utviklet i programmeringsspråket Java av Doug Cutting. Lucene støttes av Apache Software Foundation og utgis under Apache-lisensen.");
+ assertLang("en", "id", "2en", "name", "Lucene", "subject", "Apache Lucene is a free/open source information retrieval software library, originally created in Java by Doug Cutting. It is supported by the Apache Software Foundation and is released under the Apache Software License.");
+ assertLang("sv", "id", "3sv", "name", "Maven", "subject", "Apache Maven är ett verktyg utvecklat av Apache Software Foundation och används inom systemutveckling av datorprogram i programspråket Java. Maven används för att automatiskt paketera (bygga) programfilerna till en distribuerbar enhet. Maven används inom samma område som Apache Ant men dess byggfiler är deklarativa till skillnad ifrån Ants skriptbaserade.");
+ assertLang("es", "id", "4es", "name", "Lucene", "subject", "Lucene es un API de código abierto para recuperación de información, originalmente implementada en Java por Doug Cutting. Está apoyado por el Apache Software Foundation y se distribuye bajo la Apache Software License. Lucene tiene versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, Ruby y PHP.");
+ assertLang("ja", "id", "5ja", "name", "Japanese", "subject", "æ¥æ¬èªï¼ã«ã»ãããã«ã£ã½ããï¼ã¯ä¸»ã¨ãã¦ãæ¥æ¬ã§ä½¿ç¨ããã¦ããè¨èªã§ãããæ¥æ¬å½ã¯æ³ä»¤ä¸ãå
¬ç¨èªãæè¨ãã¦ããªãããäºå®ä¸ã®å
¬ç¨èªã¨ãªã£ã¦ãããå¦æ ¡æè²ã®ãå½èªãã§æããããã");
+ assertLang("th", "id", "6th", "name", "à¸à¸à¸à¸§à¸²à¸¡à¸à¸±à¸à¸ªà¸£à¸£à¹à¸à¸·à¸à¸à¸à¸µà¹", "subject", "à¸à¸±à¸à¹à¸à¸à¸¥à¸µà¸ª มารี à¸à¸±à¸à¹à¸à¸ à¸à¸£à¸±à¸à¸à¹ หรืà¸à¸¡à¸±à¸à¸£à¸¹à¹à¸à¸±à¸à¹à¸à¸ าษาà¹à¸à¸¢à¸§à¹à¸² à¹à¸à¸à¸à¹ à¹à¸à¸£à¸à¸à¹ à¹à¸à¹à¸à¹à¸à¹à¸à¸«à¸à¸´à¸à¸à¸²à¸§à¸¢à¸´à¸§ à¹à¸à¸´à¸à¸à¸µà¹à¹à¸¡à¸·à¸à¸à¹à¸à¸£à¸à¸à¹à¹à¸à¸´à¸£à¹à¸ à¸à¸£à¸°à¹à¸à¸¨à¹à¸¢à¸à¸£à¸¡à¸à¸µ à¹à¸à¸à¸¡à¸µà¸à¸·à¹à¸à¹à¸ªà¸µà¸¢à¸à¹à¸à¹à�
�à¸à¸±à¸à¹à¸à¸à¸²à¸à¸°à¸à¸¹à¹à¹à¸à¸µà¸¢à¸à¸à¸±à¸à¸à¸¶à¸à¸à¸£à¸°à¸à¸³à¸§à¸±à¸à¸à¸¶à¹à¸à¸à¹à¸à¸¡à¸²à¹à¸à¹à¸£à¸±à¸à¸à¸²à¸£à¸à¸µà¸à¸´à¸¡à¸à¹à¹à¸à¹à¸à¸«à¸à¸±à¸à¸ªà¸·à¸ à¸à¸£à¸£à¸¢à¸²à¸¢à¹à¸«à¸à¸¸à¸à¸²à¸£à¸à¹à¸à¸à¸°à¸«à¸¥à¸à¸à¹à¸à¸à¸à¸±à¸§à¸à¸²à¸à¸à¸²à¸£à¸¥à¹à¸²à¸à¸²à¸§à¸¢à¸´à¸§à¹à¸à¸à¸£à¸°à¹à¸à¸¨à¹à¸à¹à¸à¸à¸£à¹à¹à¸¥à¸à¸à¹ ระหวà¹à¸²à¸à¸à¸µà¹à¸à¸¹à¸à¹à¸¢à¸à¸£à¸¡à¸à¸µà¹à¸à¹à¸²à¸à¸£à¸à¸à¸à¸£à¸à¸à¹à¸
à¸à¹à¸§à¸à¸ªà¸à¸à¸£à¸²à¸¡à¹à¸¥à¸à¸à¸£à¸±à¹à¸à¸à¸µà¹à¸ªà¸à¸");
+ assertLang("ru", "id", "7ru", "name", "Lucene", "subject", "The Apache Lucene â ÑÑо ÑÐ²Ð¾Ð±Ð¾Ð´Ð½Ð°Ñ Ð±Ð¸Ð±Ð»Ð¸Ð¾Ñека Ð´Ð»Ñ Ð²ÑÑокоÑкоÑоÑÑного полноÑекÑÑового поиÑка, напиÑÐ°Ð½Ð½Ð°Ñ Ð½Ð° Java. ÐÐ¾Ð¶ÐµÑ Ð±ÑÑÑ Ð¸ÑполÑзована Ð´Ð»Ñ Ð¿Ð¾Ð¸Ñка в инÑеÑнеÑе и дÑÑгиÑ
облаÑÑÑÑ
компÑÑÑеÑной лингвиÑÑики (аналиÑиÑеÑÐºÐ°Ñ ÑилоÑоÑиÑ).");
+ assertLang("de", "id", "8de", "name", "Lucene", "subject", "Lucene ist ein Freie-Software-Projekt der Apache Software Foundation, das eine Suchsoftware erstellt. Durch die hohe Leistungsfähigkeit und Skalierbarkeit können die Lucene-Werkzeuge für beliebige ProjektgröÃen und Anforderungen eingesetzt werden. So setzt beispielsweise Wikipedia Lucene für die Volltextsuche ein. Zudem verwenden die beiden Desktop-Suchprogramme Beagle und Strigi eine C#- bzw. C++- Portierung von Lucene als Indexer.");
+ assertLang("fr", "id", "9fr", "name", "Lucene", "subject", "Lucene est un moteur de recherche libre écrit en Java qui permet d'indexer et de rechercher du texte. C'est un projet open source de la fondation Apache mis à disposition sous licence Apache. Il est également disponible pour les langages Ruby, Perl, C++, PHP.");
+ assertLang("nl", "id", "10nl", "name", "Lucene", "subject", "Lucene is een gratis open source, tekst gebaseerde information retrieval API van origine geschreven in Java door Doug Cutting. Het wordt ondersteund door de Apache Software Foundation en is vrijgegeven onder de Apache Software Licentie. Lucene is ook beschikbaar in andere programeertalen zoals Perl, C#, C++, Python, Ruby en PHP.");
+ assertLang("it", "id", "11it", "name", "Lucene", "subject", "Lucene è una API gratuita ed open source per il reperimento di informazioni inizialmente implementata in Java da Doug Cutting. à supportata dall'Apache Software Foundation ed è resa disponibile con l'Apache License. Lucene è stata successivamente reimplementata in Perl, C#, C++, Python, Ruby e PHP.");
+ assertLang("pt", "id", "12pt", "name", "Lucene", "subject", "Apache Lucene, ou simplesmente Lucene, é um software de busca e uma API de indexação de documentos, escrito na linguagem de programação Java. à um software de código aberto da Apache Software Foundation licenciado através da licença Apache.");
+ }
+}
Copied: lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java (from r1183754, lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java?p2=lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java&p1=lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTest.java&r1=1183754&r2=1184754&rev=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTest.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java Sun Oct 16 03:55:30 2011
@@ -31,7 +31,7 @@ import org.apache.solr.request.SolrQuery
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.servlet.SolrRequestParsers;
-public class LanguageIdentifierUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
+public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends SolrTestCaseJ4 {
protected static SolrRequestParsers _parser;
protected static SolrQueryRequest req;
@@ -189,17 +189,15 @@ public class LanguageIdentifierUpdatePro
return doc;
}
- private SolrInputDocument tooShortDoc() {
+ protected SolrInputDocument tooShortDoc() {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("text", "This text is too short");
return doc;
}
- private LanguageIdentifierUpdateProcessor createLangIdProcessor(ModifiableSolrParams parameters) throws Exception {
- return new LanguageIdentifierUpdateProcessor(_parser.buildRequestFrom(null, parameters, null), resp, null);
- }
+ protected abstract LanguageIdentifierUpdateProcessor createLangIdProcessor(ModifiableSolrParams parameters) throws Exception;
- private void assertLang(String langCode, String... fieldsAndValues) throws Exception {
+ protected void assertLang(String langCode, String... fieldsAndValues) throws Exception {
if(liProcessor == null)
throw new Exception("Processor must be initialized before calling assertLang()");
SolrInputDocument doc = sid(fieldsAndValues);
Added: lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java (added)
+++ lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java Sun Oct 16 03:55:30 2011
@@ -0,0 +1,27 @@
+package org.apache.solr.update.processor;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.common.params.ModifiableSolrParams;
+
+public class TikaLanguageIdentifierUpdateProcessorFactoryTest extends LanguageIdentifierUpdateProcessorFactoryTestCase {
+ @Override
+ protected LanguageIdentifierUpdateProcessor createLangIdProcessor(ModifiableSolrParams parameters) throws Exception {
+ return new TikaLanguageIdentifierUpdateProcessor(_parser.buildRequestFrom(null, parameters, null), resp, null);
+ }
+}
Modified: lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml?rev=1184754&r1=1184753&r2=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml Sun Oct 16 03:55:30 2011
@@ -1541,7 +1541,7 @@
-->
<!--
<updateRequestProcessorChain name="langid">
- <processor class="org.apache.solr.update.processor.LanguageIdentifierUpdateProcessorFactory">
+ <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
<str name="langid.fl">text,title,subject,description</str>
<str name="langid.langField">language_s</str>
<str name="langid.fallback">en</str>