Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 104 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,111 @@ Using this library in a Node.js project can be done as follows:
console.log('Detected language of provided text is [' + language + ']');
});

#### Languages Supported: 100 ####

* "ab": Abkhazian
* "af": Afrikaans
* "ar": Arabic
* "az": Azerbaijani
* "be": Belarusian
* "bg": Bulgarian
* "bn": Bengali
* "bo": Tibetan
* "br": Breton
* "ca": Catalan, Valencian
* "ceb": Cebuano
* "cs": Czech
* "cy": Welsh
* "da": Danish
* "de": German
* "el": Modern Greek
* "en": English
* "eo": Esperanto
* "es": Spanish, Castilian
* "et": Estonian
* "eu": Basque
* "fa": Persian
* "fi": Finnish
* "fo": Faroese
* "fr": French
* "fy": Western Frisian
* "gd": Scottish Gaelic, Gaelic
* "gl": Galician
* "gu": Gujarati
* "ha": Hausa
* "haw": Hawaiian
* "he": Hebrew
* "hi": Hindi
* "hr": Croatian
* "hu": Hungarian
* "hy": Armenian
* "id": Indonesian
* "is": Icelandic
* "it": Italian
* "ja": Japanese
* "ka": Georgian
* "kk": Kazakh
* "km": Central Khmer
* "kn": Kannada
* "ko": Korean
* "ku": Kurdish
* "ky": Kirghiz, Kyrgyz
* "la": Latin
* "lo": Lao
* "lt": Lithuanian
* "lv": Latvian
* "mg": Malagasy
* "mk": Macedonian
* "ml": Malayalam
* "mn": Mongolian
* "mr": Marathi
* "ms": Malay (macrolanguage)
* "nd": North Ndebele
* "ne": Nepali
* "nl": Dutch, Flemish
* "nn": Norwegian Nynorsk
* "no": Norwegian
* "nso": Pedi, Northern Sotho, Sepedi
* "or": Oriya
* "pa": Panjabi, Punjabi
* "pl": Polish
* "ps": Pushto, Pashto
* "pt": Portuguese
* "pt-BR": Portuguese (Brazil)
* "pt-PT": Portuguese (Portugal)
* "ro": Romanian, Moldavian, Moldovan
* "ru": Russian
* "sa": Sanskrit
* "sh": Serbo-Croatian
* "si": Sinhala, Sinhalese
* "sk": Slovak
* "sl": Slovenian, Slovene
* "so": Somali
* "sq": Albanian
* "sr": Serbian
* "sv": Swedish
* "sw": Swahili (macrolanguage)
* "ta": Tamil
* "te": Telugu
* "th": Thai
* "tl": Tagalog
* "tlh": Klingon, tlhIngan-Hol
* "tn": Tswana, Setswana
* "tr": Turkish
* "ts": Tsonga
* "tw": Twi
* "uk": Ukrainian
* "ur": Urdu
* "uz": Uzbek
* "ve": Venda
* "vi": Vietnamese
* "xh": Xhosa
* "zh": Chinese
* "zh-TW" Chinese (Taiwan)
* "zu": Zulu

#### Feedback ####

Please note this library is in the early stages of development. If you find any bugs or issues please file them on this project and I'll take a look.

Please also feel free to catch me on Twitter [@richtibbett](http://twitter.com/richtibbett/).
Please also feel free to catch me on Twitter [@richtibbett](http://twitter.com/richtibbett/).
2 changes: 1 addition & 1 deletion lib/_languageData.js

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions lib/guessLanguage.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@
"pl": "Polish",
"ps": "Pashto",
"pt": "Portuguese",
"pt_PT": "Portuguese (Portugal)",
"pt_BR": "Portuguese (Brazil)",
"pt-PT": "Portuguese (Portugal)",
"pt-BR": "Portuguese (Brazil)",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
Expand Down Expand Up @@ -141,7 +141,7 @@
"vi": "Vietnamese",
"xh": "Xhosa",
"zh": "Chinese",
"zh_TW": "Traditional Chinese (Taiwan)",
"zh-TW": "Traditional Chinese (Taiwan)",
"zu": "Zulu",
};

Expand Down Expand Up @@ -233,7 +233,7 @@
"uz": 26540,
"vi": 26550,
"zh": 26065,
"zh_TW": 22,
"zh-TW": 22,
};

var SINGLETONS = [
Expand All @@ -252,7 +252,7 @@
["Thai", "th"],
["Lao", "lo"],
["Tibetan", "bo"],
["Burmese", "my"],
["Burmese", "my"], // Unimplemented
["Georgian", "ka"],
["Mongolian", "mn"],
["Khmer", "km"]
Expand All @@ -266,7 +266,7 @@
var CYRILLIC = ["ru", "uk", "kk", "uz", "mn", "sr", "mk", "bg", "ky"];
var ARABIC = ["ar", "fa", "ps", "ur"];
var DEVANAGARI = ["hi", "ne"];
var PT = ["pt_BR", "pt_PT"];
var PT = ["pt-BR", "pt-PT"];

// Unicode char greedy regex block range matchers
var unicodeBlockTests = {
Expand Down