我有一个unicode字符串,因此我想检查字符是连续位还是起始位,以便通过简单的程序将unicode字符的数量计算为
#!/usr/bin/env python
# -*- coding: utf-8 -*-
def arg(str):
i = 0
j = 0
print i
for test in str:
print test
value = int(test,16)
if (value & 0xc0) != 0x80:
j=j+1
print "hello"
print j
#return j
def main():
print "inside main"
new = "象形字"
charlen = len(new)
print charlen
tes = new.decode('utf-8')
declen = len(tes)
print declen
data = tes.encode('utf-8')
# print self_len
enclen = len(data)
print enclen
print data
arg(data)
if __name__ == "__main__":
main()
运行该代码将错误显示为
象形字[Decode error - output not utf-8]
Traceback (most recent call last):
File "/Users/laxmi518/Documents/laxmi/code/C/python-c/python_unicode.py", line 69, in <module>
main()
File "/Users/laxmi518/Documents/laxmi/code/C/python-c/python_unicode.py", line 52, in main
arg(data)
File "/Users/laxmi518/Documents/laxmi/code/C/python-c/python_unicode.py", line 16, in arg
value = int(test,16)
ValueError: invalid literal for int() with base 16: '\xe8'
[Finished in 0.1s with exit code 1]
目前没有回答
相关问题 更多 >
编程相关推荐