forked from QPC1234/LZWEncoding-1
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlzwEncoding.java
More file actions
106 lines (92 loc) · 3.13 KB
/
Copy pathlzwEncoding.java
File metadata and controls
106 lines (92 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import java.util.*;
import java.io.*;
public class lzwEncoding
{
final static int INITIAL_TABLE_SIZE = 128;//creates a final variable because 128 is a "magic number" that appears multiple times
final static int MAX_TABLE_SIZE = 55296; //the maximum size of our table
static int[] codeRecency = new int[MAX_TABLE_SIZE];
static ArrayDeque<CodeNode> recentQueue = new ArrayDeque<CodeNode>();
public static HashMap<String,Character> init(HashMap<String,Character> table)
{
// fill the table with the standard ascii 1-128
for(int a = 0; a < INITIAL_TABLE_SIZE; a++)
{
char current = (char)(a);
// (pattern (string) + corresponding ascii (char))
table.put(current+"",current);
}
return table;
}
public static void encode (String input, String outputFileName)
{
// the table containing the pattern and corresponding ascii - "a" -> 'a'
HashMap<String, Character> table = new HashMap<String, Character>();
// fill the table with the standard ascii 1-128
init(table);
try
{
BufferedReader br = new BufferedReader(new FileReader(input));
//prints directly to the outputFile instead of using a stringbuilder so it runs faster
File file = new File(outputFileName);
if (!file.exists()) {
file.createNewFile();
}
PrintWriter pw = new PrintWriter(file);
int current = br.read();
// last char of previous pattern
String prev = (char)current + "";
// the next available ascii/table slot
int num = INITIAL_TABLE_SIZE;
while(current != -1)
{
current = br.read();
// current portion of the text being scanned for new patterns
String temp = prev + (char)current;
// pattern not found
if(!table.containsKey(temp))
{
// encode previous
//prints to output file instead of appending
pw.print(table.get(prev));
// max 256 bc the extended ascii table ends at 255, so we can't represent anything past 255
//the larger the table the more it compresses, so we increased the max table size to a max of 2^15 (32768) (2^16 created some unreadable chars)
// add to the table
if(num < MAX_TABLE_SIZE) {
if(prev.length() > 1){
recentQueue.add(new CodeNode(prev, table.get(prev)));
codeRecency[(int)(table.get(prev)).charValue()]++;
}
table.put(temp, (char)num);
recentQueue.add(new CodeNode(temp, (char)num));
}
else {
CodeNode tempNode = recentQueue.pollFirst();
while(codeRecency[(int)tempNode.code] > 1) {
codeRecency[tempNode.code]-=1;
tempNode = recentQueue.pollFirst();
}
if(prev.length() > 1) {
recentQueue.add(new CodeNode(prev, table.get(prev)));
codeRecency[(int)(table.get(prev)).charValue()]++;
}
table.remove(tempNode.key);
table.put(temp, tempNode.code);
recentQueue.add(new CodeNode(temp, tempNode.code));
}
// increase the next available ascii/table slot
num++;
// reset bc we've already encoded the previous
prev = "";
}
// add to or set the previous
prev += (char)current;
}
br.close();
pw.close();
}
catch(IOException e)
{
System.out.println("IOException");
}
}
}