用于将纯文本(ASCII)转换为GSM 7位字符集的Python库?
有没有一个Python库可以把ASCII数据编码成7位的GSM字符集(用于发送短信)?
5 个回答
2
以上所有的解决方案都不正确。GSM 03.38 编码只使用 7 位来表示一个字符,而上述所有方案都使用了字节对齐的输出,这在大多数情况下与 ASCII 的结果是一样的。这里有一个正确的解决方案,使用了位字符串。
我在使用 Python 的一个额外模块:
pip3 install gsm0338
gsmencode.py
:
import sys
import gsm0338
def __create_septets__(octets: bytes) -> (bytes, int):
num_bits = 0
data = 0
septets = bytearray()
for i in range(len(octets)):
gsm_char = octets[i]
data |= (gsm_char << num_bits)
num_bits += 7
while num_bits >= 8:
septets.append(data & 0xff)
data >>= 8
num_bits -= 8
if num_bits > 0:
septets.append(data & 0xff)
return bytes(septets), len(octets) % 8
if __name__ == '__main__':
octets = sys.argv[1].encode('gsm03.38')
septets, sparse = __create_septets__(octets)
print("sparse bits: %d" % sparse)
print("encoded (hex): %s" % septets.hex())
python3 gsmencode.py Sample
输出:
sparse bits: 6
encoded (hex): d3701bce2e03
3
我从gnibbler的回答中得到了些建议。这里有一个我自己想出来的脚本,灵感来自一个在线转换器:http://smstools3.kekekasvi.com/topic.php?id=288,这个脚本对我来说运行得很好,既可以编码也可以解码。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
gsm = (u"@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>"
u"?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑÜ`¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = (u"````````````````````^```````````````````{}`````\\````````````[~]`"
u"|````````````````````````````````````€``````````````````````````")
def get_encode(currentByte, index, bitRightCount, position, nextPosition, leftShiftCount, bytesLength, bytes):
if index < 8:
byte = currentByte >> bitRightCount
if nextPosition < bytesLength:
idx2 = bytes[nextPosition]
byte = byte | ((idx2) << leftShiftCount)
byte = byte & 0x000000FF
else:
byte = byte & 0x000000FF
return chr(byte).encode('hex').upper()
return ''
def getBytes(plaintext):
if type(plaintext) != str:
plaintext = str(plaintext)
bytes = []
for c in plaintext.decode('utf-8'):
idx = gsm.find(c)
if idx != -1:
bytes.append(idx)
else:
idx = ext.find(c)
if idx != -1:
bytes.append(27)
bytes.append(idx)
return bytes
def gsm_encode(plaintext):
res = ""
f = -1
t = 0
bytes = getBytes(plaintext)
bytesLength = len(bytes)
for b in bytes:
f = f+1
t = (f%8)+1
res += get_encode(b, t, t-1, f, f+1, 8-t, bytesLength, bytes)
return res
def chunks(l, n):
if n < 1:
n = 1
return [l[i:i + n] for i in range(0, len(l), n)]
def gsm_decode(codedtext):
hexparts = chunks(codedtext, 2)
number = 0
bitcount = 0
output = ''
found_external = False
for byte in hexparts:
byte = int(byte, 16);
# add data on to the end
number = number + (byte << bitcount)
# increase the counter
bitcount = bitcount + 1
# output the first 7 bits
if number % 128 == 27:
'''skip'''
found_external = True
else:
if found_external == True:
character = ext[number % 128]
found_external = False
else:
character = gsm[number % 128]
output = output + character
# then throw them away
number = number >> 7
# every 7th letter you have an extra one in the buffer
if bitcount == 7:
if number % 128 == 27:
'''skip'''
found_external = True
else:
if found_external == True:
character = ext[number % 128]
found_external = False
else:
character = gsm[number % 128]
output = output + character
bitcount = 0
number = 0
return output
18
现在有了 :)
感谢 Chad 指出这个说法不太准确
Python2 版本
# -*- coding: utf8 -*-
gsm = (u"@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>"
u"?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = (u"````````````````````^```````````````````{}`````\\````````````[~]`"
u"|````````````````````````````````````€``````````````````````````")
def gsm_encode(plaintext):
res = ""
for c in plaintext:
idx = gsm.find(c)
if idx != -1:
res += chr(idx)
continue
idx = ext.find(c)
if idx != -1:
res += chr(27) + chr(idx)
return res.encode('hex')
print gsm_encode(u"Hello World")
输出是十六进制的。如果你想要二进制流,可以直接跳过这个。
Python3 版本
# -*- coding: utf8 -*-
import binascii
gsm = ("@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?"
"¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = ("````````````````````^```````````````````{}`````\\````````````[~]`"
"|````````````````````````````````````€``````````````````````````")
def gsm_encode(plaintext):
res = ""
for c in plaintext:
idx = gsm.find(c);
if idx != -1:
res += chr(idx)
continue
idx = ext.find(c)
if idx != -1:
res += chr(27) + chr(idx)
return binascii.b2a_hex(res.encode('utf-8'))
print(gsm_encode("Hello World"))