/****************************************************************************** * This program implements Huffman Coding to compress/decompress a file. * * Copyright © 2021 Richard Lesh. All rights reserved. *****************************************************************************/ import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.pureprogrammer.Tuple; import org.pureprogrammer.Utils; public class HuffmanCoding { // Store 16-bits of the int static void putInt(int i, BufferedOutputStream ofh) throws IOException { ofh.write((i >> 8) & 0xFF); ofh.write(i & 0xFF); } // Read 16-bits of an int // -1 becomes 65535 static int getInt(BufferedInputStream ifh) throws IOException { int b; b = Utils.getbyte(ifh); int i = b; i <<= 8; b = Utils.getbyte(ifh); i |= b; return i; } // POD (plain old data) type to represent Huffman tree node. // value of -1 is an internal node, otherwise leaf node. static class Node { public int value; public int frequency; public Node left; public Node right; public Node(int v, int f) { value = v; frequency = f; left = null; right = null; } public void write(BufferedOutputStream ofh) throws IOException { _.putInt(value, ofh); } } static Map FREQUENCIES = new HashMap<>(); static List TREE = new ArrayList<>(); static Map> HUFFMAN_CODES = new HashMap<>(); static void bubbleSortReverse(List list) { for (int i = list.size() - 2; i >= 0; --i) { boolean hasSwap = false; for (int j = 0; j <= i; ++j) { if (list.get(j).frequency < list.get(j + 1).frequency || list.get(j).frequency == list.get(j + 1).frequency && list.get(j).value < list.get(j + 1).value) { final Node TEMP = list.get(j + 1); list.set(j + 1, list.get(j)); list.set(j, TEMP); hasSwap = true; } } if (!hasSwap) { break; } } } static void writeTree(final Node node, BufferedOutputStream ofh) throws IOException { node.write(ofh); if (node.value == -1) { writeTree(node.left, ofh); writeTree(node.right, ofh); } } static Node readTree(BufferedInputStream ifh) throws IOException { Node node; node = new Node(getInt(ifh), 0); if (node.value == 65535) { node.left = readTree(ifh); node.right = readTree(ifh); } return node; } static int PARTIAL_BYTE = 0; static int PARTIAL_BITS = 0; static void putBit(boolean b, BufferedOutputStream ofh) throws IOException { PARTIAL_BYTE <<= 1; if (b) { PARTIAL_BYTE |= 1; } ++PARTIAL_BITS; if ((PARTIAL_BITS == 8)) { ofh.write(PARTIAL_BYTE); PARTIAL_BYTE = 0; PARTIAL_BITS = 0; } } static void putCode(int c, BufferedOutputStream ofh) throws IOException { final Tuple.T2 code = HUFFMAN_CODES.get(c); int mask = 1 << (code.second() - 1); for (int i = 0; i < code.second(); ++i) { final boolean b = (code.first() & mask) != 0; putBit(b, ofh); mask >>= 1; } } static void lookupCode(int c, BufferedOutputStream ofh) throws IOException { PARTIAL_BYTE <<= 8; PARTIAL_BYTE |= c; PARTIAL_BITS += 8; while (PARTIAL_BITS > 0) { int mask = 1 << (PARTIAL_BITS - 1); Node node = TREE.get(0); int outputPath = 0; int bitsUsed = 0; while (node.value == 65535) { if (mask == 0) { return; } outputPath <<= 1; if ((PARTIAL_BYTE & mask) == 0) { node = node.left; } else { node = node.right; outputPath |= 1; } ++bitsUsed; mask >>= 1; } ofh.write(node.value); PARTIAL_BITS -= bitsUsed; PARTIAL_BYTE &= ~(outputPath << PARTIAL_BITS); } } static void constructHuffmanCodes(final Node node, final Tuple.T2 code) { if (node.value == -1) { Tuple.T2 newCode = Tuple.makeTuple(code.first() << 1, code.second() + 1); constructHuffmanCodes(node.left, newCode); newCode = Tuple.makeTuple((code.first() << 1) | 1, code.second() + 1); constructHuffmanCodes(node.right, newCode); } else { HUFFMAN_CODES.put(node.value, code); } } static void buildTree() { for (int k : FREQUENCIES.keySet()) { TREE.add(new Node(k, FREQUENCIES.get(k))); } int treeSize = TREE.size(); while (treeSize > 1) { bubbleSortReverse(TREE); final Node NEW_NODE = new Node(-1, 0); NEW_NODE.right = TREE.remove(TREE.size() - 1); NEW_NODE.left = TREE.remove(TREE.size() - 1); NEW_NODE.frequency = NEW_NODE.left.frequency + NEW_NODE.right.frequency; TREE.add(NEW_NODE); treeSize = TREE.size(); } } static void computeFrequencies(String fromFilespec) throws IOException { BufferedInputStream ifh = new BufferedInputStream(new FileInputStream(fromFilespec)); int c; while ((c = Utils.getbyte(ifh)) != -1) { if (!FREQUENCIES.containsKey(c)) { FREQUENCIES.put(c, 0); } FREQUENCIES.put(c, FREQUENCIES{c} + 1); } try {ifh.close();} catch (IOException ex) {}; } static void compressFile(String fromFilespec, String toFilespec) throws IOException { BufferedOutputStream ofh = new BufferedOutputStream(new FileOutputStream(toFilespec)); writeTree(TREE.get(0), ofh); BufferedInputStream ifh = new BufferedInputStream(new FileInputStream(fromFilespec)); int c; while ((c = Utils.getbyte(ifh)) != -1) { putCode(c, ofh); } for (int i = 0; i < 7; ++i) { putBit(false, ofh); } try {ofh.close();} catch (IOException ex) {}; try {ifh.close();} catch (IOException ex) {}; } static void decompressFile(String fromFilespec, String toFilespec) throws IOException { BufferedInputStream ifh = new BufferedInputStream(new FileInputStream(fromFilespec)); BufferedOutputStream ofh = new BufferedOutputStream(new FileOutputStream(toFilespec)); TREE = Arrays.asList(readTree(ifh)); int c; while ((c = Utils.getbyte(ifh)) != -1) { lookupCode(c, ofh); } try {ofh.close();} catch (IOException ex) {}; try {ifh.close();} catch (IOException ex) {}; } public static void main(String[] args) { if (args.length != 3) { System.out.println(Utils.join("", "Syntax: ", "HuffmanCoding", " {fromFilespec} {toFilespec} {C|D}")); System.exit(1); } String fromFilespec = args[0]; String toFilespec = args[1]; String mode = args[2]; try { if (mode.equals("C") ) { computeFrequencies(fromFilespec); buildTree(); final Tuple.T2 startCode = Tuple.makeTuple(0, 0); constructHuffmanCodes(TREE.get(0), startCode); compressFile(fromFilespec, toFilespec); } else if (mode.equals("D") ) { decompressFile(fromFilespec, toFilespec); } else { System.out.println("Bad mode. Only C|D allowed."); } } catch (IOException ex) { System.out.println(Utils.join("", "Error: ", Utils.exceptionMessage(ex))); } } }