I am learning about Regular expressions (regex) for English and although some of the concepts seem like they would apply to other languages such as Japanese, I feel as if ma
For Python
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
kanji = u'漢字'
hiragana = u'ひらがな'
katakana = u'カタカナ'
str = kanji + hiragana + katakana
#Match Kanji
regex = u'[\u4E00-\u9FFF]+' # == u'[一-龠々]+'
match = re.search(regex, str, re.U)
print match.group().encode('utf-8') #=> 漢字
#Match Hiragana
regex = u'[\u3040-\u309Fー]+' # == u'[ぁ-んー]+'
match = re.search(regex, str, re.U)
print match.group().encode('utf-8') #=> ひらがな
#Match Katakana
regex = u'[\u30A0-\u30FF]+' # == u'[ァ-ヾ]+'
match = re.search(regex, str, re.U)
print match.group().encode('utf-8') #=>カタカナ