如何在创建Huffman cod时处理'/n'、'/t'和类似的ascii键

2024-05-08 19:46:36 发布

您现在位置:Python中文网/ 问答频道 /正文

对于我的项目,我必须用哈夫曼算法压缩一个文件。我明白了大部分,但是我很难处理其他ASCII字符,比如换行符,制表符,等等,这个是我到目前为止所做的,但是我无法在处理文件创建时获得其他字符。你知道吗

import operator

def getFreq(mylst):
    dct = {'a':0,'b':0,'c':0,'d':0,'e':0,'f':0,'g':0,'h':0,'i':0,'j':0,'k':0,'l':0,
       'm':0,'n':0,'o':0,'p':0,'q':0,'r':0,'s':0,'t':0,'u':0,'v':0,'w':0,'x':0,'y':0,'z':0,' ':0,
       'A':0,'B':0,'C':0,'D':0,'E':0,'F':0,'G':0,'H':0,'I':0,'J':0,'K':0,'L':0,
       'M':0,'N':0,'O':0,'P':0,'Q':0,'R':0,'S':0,'T':0,'U':0,'V':0,'W':0,'X':0,'Y':0,'Z':0,'.':0,',':0,
       '1':0,'2':0,'3':0,'4':0,'5':0,'6':0,'7':0,'8':0,'9':0,'0':0, '-':0,'(':0, ')':0}

    for k, v in dct.items():
        for i in mylst:
            if i == k:
                dct[k] += 1
                up_dct = sorted(dct.items(), key=operator.itemgetter(1), reverse=True)

    srt_dct = dict((k, v) for k, v in up_dct)

    return srt_dct


def assign_code(nodes, label, result, prefix = ''):
    childs = nodes[label]
    tree = {}
    if len(childs) == 2:
        tree['0'] = assign_code(nodes, childs[0], result, prefix+'0')
        tree['1'] = assign_code(nodes, childs[1], result, prefix+'1')
        return tree
    else:
        result[label] = prefix
        return label

def Huffman_code(_vals):
    vals = _vals.copy()
    nodes = {}
    for n in vals.keys(): # leafs initialization
        nodes[n] = []

    while len(vals) > 1: # binary tree creation
        s_vals = sorted(vals.items(), key=lambda x:x[1])
        a1 = s_vals[0][0]
        a2 = s_vals[1][0]
        vals[a1+a2] = vals.pop(a1) + vals.pop(a2)
        nodes[a1+a2] = [a1, a2]
    code = {}
    root = a1+a2
    tree = {}
    tree = assign_code(nodes, root, code)   # assignment of the code for the given binary tree
    return code, tree

r_file = open('test.txt', 'r')

ro = r_file.read()


lst = list(ro)

freq = getFreq(lst)

code, tree = Huffman_code(freq)

encoded = ''.join([code[t] for t in ro])
print('Encoded text:',encoded)

w_file = open('encrypt.txt','wt')
w_file.write(encoded)
w_file.close()

d_file = open('encrypt.txt', 'r')

dec_ode = d_file.read()

decoded = []
i = 0
while i < len(dec_ode): # decoding using the binary graph
    ch = dec_ode[i]
    act = tree[ch]
    while not isinstance(act, str):
        i += 1
        ch = dec_ode[i]
        act = act[ch]
    decoded.append(act)
    i += 1


test2_file = open('decode.txt', 'wt')
test2_file.write(''.join(decoded))

test2_file.close()

print('Decoded text:',''.join(decoded))

Tags: intreea2forreturna1coderesult
1条回答
网友
1楼 · 发布于 2024-05-08 19:46:36

请尝试将此作为getFreq函数:

def getFreq(mylst):
    counters = [0 for _ in range(256)]
    for c in mylst:
        counters[ord(c)] += 1
    return { chr(c): v for c, v in enumerate(counters) }

您需要以字节而不是不同字符的形式查看字符列表。此函数将输入字符转换为相应的ASCII值,并使用该值索引到计数器列表中。例如,每次遇到acounters的第97个元素都会递增。然后在计算完所有字符之后counters被转换回程序所期望的dict。你知道吗

相关问题 更多 >