1、将字符串'024f'转化为unicode字符,先将字符转化为16进制整数
code = int('024f',base=16)
print '%x'%code,'%04x'%code
输出结果:24f 024f,一般选择后者处理凑足偶数字节
转化unicode编码
unichr(code)
运行得到unicode编码
u'\u024f'
uc = unichr(code)
print uc, type(uc)
输出字符,类型,特别注意unicode类型,处理起来有点不同,两个字节算一个字符
ɏ,unicode
转gb18030编码
uc.encode('gb18030')
'\x810\xab6'
gbk = uc.encode('gb18030')
print type(gbk)
类型是str
<type 'str'>
gbk = uc.encode('gb18030')
gbkcode = str()
for el in gbk:
gbkcode += '%02x'%ord(el)
print gbkcode
8130ab36
uc.encode('utf-8')
输出
'\xc9\x8f'
解码
utf8 = uc.encode('utf-8')
utf8.decode('utf-8')
输出
u'\u024f'
gbk.decode('gb18030')
输出
u'\u024f'
def writeTxt(fdir, info):
print '%s\\f0.txt'%fdir
f=open('%s\\f0.txt'%fdir,'a')
f.write(info)
f.write('\n')
f.close()
def getcosins(wordcode, gbkflag=0):
if gbkflag == 2:
wordcodelen = 2
else:
wordcodelen = len(wordcode)
cosins = '20ff%02x00%02x'%(gbkflag, wordcodelen)
if gbkflag == 2:
cosins += '%04x'%ord(wordcode)
else:
for i in range(len(wordcode)):
cosins += '%02x'%ord(wordcode[i])
writeTxt('F:\zpcs', cosins)
return cosins
def writegbkucode(uccode):
code = int(uccode,base=16)
print code
uc = unichr(code)
print uc
getcosins(uc, 2)
ucgbk = uc.encode('gb18030')
getcosins(ucgbk)
writegbkucode('024f')