python判断unicode字符类型
程序员文章站
2022-04-19 23:18:47
def is_chinese(uchar):
"""判断一个unicode是否是汉字"""
if uchar >= u'\u4e00' and uc...
def is_chinese(uchar): """判断一个unicode是否是汉字""" if uchar >= u'\u4e00' and uchar= u'\u0030' and uchar= u'\u0041' and uchar= u'\u0061' and uchar0x7e: #不是半角字符就返回原来的字符 return uchar if inside_code==0x0020: #除了空格其他的全角半角的公式为:半角=全角-0xfee0 inside_code=0x3000 else: inside_code+=0xfee0 return unichr(inside_code) def q2b(uchar): """全角转半角""" inside_code=ord(uchar) if inside_code==0x3000: inside_code=0x0020 else: inside_code-=0xfee0 if inside_code0x7e: #转完之后不是半角字符返回原来的字符 return uchar return unichr(inside_code) def stringq2b(ustring): """把字符串全角转半角""" return "".join([q2b(uchar) for uchar in ustring]) def uniform(ustring): """格式化字符串,完成全角转半角,大写转小写的工作""" return stringq2b(ustring).lower() def string2list(ustring): """将ustring按照中文,字母,数字分开""" retlist=[] utmp=[] for uchar in ustring: if is_other(uchar): if len(utmp)==0: continue else: retlist.append("".join(utmp)) utmp=[] else: utmp.append(uchar) if len(utmp)!=0: retlist.append("".join(utmp)) return retlist if __name__=="__main__": #test q2b and b2q for i in range(0x0020,0x007f): print q2b(b2q(unichr(i))),b2q(unichr(i)) #test uniform ustring=u'中国 人名a高频a' ustring=uniform(ustring) ret=string2list(ustring) print ret
上一篇: Python模块搜索路径