-
Notifications
You must be signed in to change notification settings - Fork 0
/
huffman_encoding.py
executable file
·65 lines (42 loc) · 1.41 KB
/
huffman_encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/python3
from huffman import binary_string_add_one, calculate_probability, canonical_huffman_codes, create_tree, Node, decode_data, encode_canonical_file, encode_file, encoding_to_bytestring
import sys
def calculate_code(node, value=''):
new_value = value + str(node.code)
if(node.left):
calculate_code(node.left, new_value)
if(node.right):
calculate_code(node.right, new_value)
if(not node.left and not node.right):
codes[node.symbol] = new_value
return codes
input_path = sys.argv[1]
output_path = sys.argv[2]
codes = dict()
# read file
with open(input_path, 'r') as file:
data = file.read()
# calculate probabilities
probabilities = calculate_probability(data)
nodes = []
for key in probabilities:
nodes.append(Node(probabilities[key], key))
# create tree
tree = create_tree(nodes)
# nice function for if we want to see the tree
def printTree(node, level=0):
if node != None:
printTree(node.left, level + 1)
print(' ' * 4 * level + '-> ' + str(node.symbol))
printTree(node.right, level + 1)
codes = probabilities
# printTree(tree)
# calculate codes
codes_x = calculate_code(tree, '')
# calculate canonical codes
can_codes = canonical_huffman_codes(codes_x)
# encode table
can_code_string = encoding_to_bytestring(can_codes)
print(can_codes)
# write file
encode_canonical_file(data, can_codes, can_code_string, output_path)