定义

霍夫曼树：假设有n个权值,可以构造一颗具有n个叶子节点的二叉树，其中带权路径长度WPL最小的二叉树称作最优二叉树，也叫霍夫曼树。哈夫曼树是带权路径长度最短的树，权值较大的结点离根较近。

构造过程

假设有n个权值，则构造出的哈夫曼树有n个叶子结点。 n个权值分别设为 w1、w2、…、wn，则哈夫曼树的构造规则为：
(1) 将w1、w2、…，wn看成是有n 棵树的森林(每棵树仅有一个结点)；
(2) 在森林中选出两个根结点的权值最小的树合并，作为一棵新树的左、右子树，且新树的根结点权值为其左、右子树根结点权值之和；
(3)从森林中删除选取的两棵树，并将新树加入森林；
(4)重复(2)、(3)步，直到森林中只剩一棵树为止，该树即为所求得的哈夫曼树。

前缀编码

前缀编码是指对字符集进行编码时，要求字符集中任一字符的编码都不是其它字符的编码的前缀，例如：设有abcd需要编码表示（其中，a=0、b=10、c=110、d=11,则表示110的前缀可以是c或者da，不唯一）。

利用赫夫曼树进行信息压缩

例如我要发送语句“you see see you one day day” 一共是27个字符，一个字节八位那就是216位。但如果构造霍夫曼树得到前缀编码则可将数据压缩到 11个字节。

下面是百度百科给出的信息压缩的例子：
ABFACGCAHGBBAACECDFGFAAEABBB
1.统计：A(8) B(6) C(4) D(1) E(2) F(3) G(3)H(1)
2.构造Huffman树
3.得到Huffman编码
A： 01
B： 11
C： 001
D：00000
E： 0001
F： 100
G： 101
H：00001
字符串新编码长度：82+62+43+15+24+33+33+15=76

下面给出利用霍夫曼编码实现信息压缩的代码示例：

树节点

package HuffmanTree;

public class Node implements Comparable<Node> {
	//存储的字符
	Byte data;
	//权值
	int weight;
	//左右节点
	Node leftSon;
	Node rightSon;
	
	public Node(Byte data,int weight){
		this.data = data;
		this.weight = weight;
	}

	@Override
	//按权值从大到小排列
	public int compareTo(Node o) {
		return -(this.weight-o.weight);
	}
	
	@Override
	public String toString(){
		return "Node [data="+data+",weight"+weight+"]";
	}
}

压缩文件的过程于上面百度百科给的示例差不多。

package HuffmanTree;

import java.io.*;
import java.util.*;

/**
 * 赫夫曼编码
 * 常用于压缩文件
 * @author 1
 *
 */
public class HuffmanCode {
	public static void main(String[] args){
		String msg = "you see see you one day day";
		byte[] bytes = msg.getBytes();
		//进行赫夫曼编码
		byte[] b = huffmanZip(bytes);
		System.out.println(bytes.length);
		System.out.println(b.length);
		//使用赫夫曼编码解码
		byte[] deBytes = decode(huffCodes,b);
		System.out.println(Arrays.toString(bytes));
		System.out.println(Arrays.toString(deBytes));
		System.out.println(new String(deBytes));
//		try{
//			zipFile("C:\\Users\\1\\Desktop\\1.bmp","C:\\Users\\1\\Desktop\\2.zip");
//		}catch(IOException e){
//			System.out.println("压缩失败。");
//			e.printStackTrace();
//		}
//		
//		try{
//			unZip("C:\\Users\\1\\Desktop\\2.zip","C:\\Users\\1\\Desktop\\2.bmp");
//		}catch(Exception e){
//			e.printStackTrace();
//		}
		
	}
	/**
	 * 压缩文件
	 * @param src
	 * @param dst
	 * @throws IOException
	 */
	public static void zipFile(String src,String dst) throws IOException{
		//创建一个输入流
		InputStream is = new FileInputStream(src);
		//创建一个和输入流指向文件大小相同的byte数组
		byte[] b = new byte[is.available()];
		
		is.read(b);
		is.close();
		//使用赫夫曼编码进行编码
		byte[] byteZip = huffmanZip(b);
		//输出到指定的位置上
		OutputStream os = new FileOutputStream(dst);
		ObjectOutputStream oos = new ObjectOutputStream(os);
		oos.writeObject(byteZip);
		//把编码表写入文件，便于解码
		oos.writeObject(huffCodes);
		oos.close();
		os.close();
	}
	/**
	 * 解压文件
	 * @param src
	 * @param dst
	 * @throws IOException
	 */
	public static void unZip(String src,String dst) throws Exception{
		//创建一个输入流
		InputStream is = new FileInputStream(src);
		ObjectInputStream ois = new ObjectInputStream(is);
		//读取byte数组
		byte[] b = (byte[])ois.readObject();
		//读取赫夫曼编码表
		Map<Byte, String> codes = (Map<Byte, String>)ois.readObject();
		ois.close();
		is.close();
		//解码
		byte[] bytes = decode(codes,b);
		
		//创建一个输出流
		OutputStream os = new FileOutputStream(dst);
		//写数据
		os.write(bytes);
		os.close();
	}
	
	
	
	/**
	 * 编码压缩数据
	 * @param bytes
	 * @return
	 */
	private static byte[] huffmanZip(byte[] bytes) {
		//先统计每一个byte出现的次数，并放入一个集合中
		List<Node> nodes = getNodes(bytes);
		//创建一颗赫夫曼树
		Node tree = createHuffmanTree(nodes);
		//创建一个赫夫曼编码表
		Map<Byte, String> huffmanCodes = getCodes(tree);
		System.out.println(huffmanCodes);
		//编码
		byte[] b = zip(bytes, huffCodes);
		return b;
	}
	/**
	 * 利用编码表进行赫夫曼解码
	 * @param huffCodes
	 * @param bytes
	 * @return
	 */
	private static byte[] decode(Map<Byte, String> huffCodes, byte[] bytes) {
		StringBuilder sb = new StringBuilder();
		//将byte数组转化为一个二进制字符串
		for(int i=0;i<bytes.length;i++){
			byte b = bytes[i];
			//是否是最后一个
			boolean flag = (i==bytes.length-1);
			sb.append(byteToBitStr(!flag,b));
		}
		//把字符串按照指定的赫夫曼编码进行解码
		//把赫夫曼编码表的键值对进行调换
		Map<String, Byte> map = new HashMap<String, Byte>();
		for(Map.Entry<Byte,String> entry:huffCodes.entrySet()){
			map.put(entry.getValue(),entry.getKey());
		}
		//创建有一个集合，用于存储byte
		List<Byte> list = new ArrayList<Byte>();
		//处理字符串
		for(int i=0; i<sb.length();){
			int count = 1;
			boolean flag = true;
			Byte b =null;
			//截取出一个byte
			while(flag){
				String key = sb.substring(i,i+count);
				b = map.get(key);
				if(b==null){
					count++;
				}else{
					flag = false;
				}
			}
			list.add(b);
			i+=count;
		}
		//把集合转化为数组
		byte[] b = new byte[list.size()];
		for(int i=0;i<b.length;i++){
			b[i] = list.get(i);
		}
		return b;
	}
	
	private static String byteToBitStr(boolean flag, byte b){
		int temp = b;
		if(flag){
			//按位或256 即1 0000 0000
			temp |=256;
		}
		String str = Integer.toBinaryString(temp);
		if(flag){
			//截取最后八位
			return	str.substring(str.length()-8);
		}else{
			//若是最后一个，则不用转化为八位
			return	str;
		}
		
	}

	/**
	 * 把byte数组转化为node集合
	 * @param bytes
	 * @return
	 */
	private static List<Node> getNodes(byte[] bytes) {
		List<Node> nodes = new ArrayList<Node>();
		//储存每一个byte出现了多少次
		Map<Byte, Integer> counts = new HashMap<Byte, Integer>();
		//统计每一个byte出现的次数
		for(byte b:bytes){
			Integer count = counts.get(b);
			if(count == null){
				counts.put(b, 1);
			}else{
				counts.put(b, count+1);
			}
		}
		//把每一个键值对转化为一个node对象
		for(Map.Entry<Byte, Integer> entry:counts.entrySet()){
			nodes.add(new Node(entry.getKey(), entry.getValue()));
		}
		
		return nodes;
	}
	/**
	 * 创建赫夫曼树
	 * @param nodes
	 * @return
	 */
	private static Node createHuffmanTree(List<Node> nodes) {
		while(nodes.size()>1){
			//排序
			Collections.sort(nodes);
			//取出两个权值最小的二叉树
			Node left = nodes.get(nodes.size()-1);
			Node right = nodes.get(nodes.size()-2);
			//创建一颗新二叉树
			Node parent = new Node(null,left.weight+right.weight);
			//把取出来的两颗二叉树设置为新创建树的子树
			parent.leftSon = left;
			parent.rightSon = right;
			//把取出来的两个二叉树移除
			nodes.remove(left);
			nodes.remove(right);
			//将新创建的数加入原来的集合中
			nodes.add(parent);
			
		}
		return nodes.get(0);
	}
	/**
	 * 根据赫夫曼树获取赫夫曼编码
	 * @param tree
	 * @return
	 */
	//用于临时存储路径
	static StringBuilder sb = new StringBuilder();
	//用于储存赫夫曼编码
	static Map<Byte, String> huffCodes = new HashMap<Byte, String>();
	private static Map<Byte, String> getCodes(Node tree) {
		if(tree == null){
			return null;
		}
		getCodes(tree.leftSon,"0",sb);
		getCodes(tree.rightSon,"1",sb);
		return huffCodes;
	}

	private static void getCodes(Node node, String code, StringBuilder sb) {
		StringBuilder sb2 = new StringBuilder(sb);
		sb2.append(code);
		if(node.data == null){
			getCodes(node.leftSon,"0",sb2);
			getCodes(node.rightSon,"1",sb2);
		}else{
			huffCodes.put(node.data,sb2.toString());
		}
	}
	/**
	 * 进行赫夫曼编码
	 * @param bytes
	 * @param huffCodes
	 * @return
	 */
	private static byte[] zip(byte[] bytes, Map<Byte, String> huffCodes) {
		StringBuilder sb = new StringBuilder();
		//把需要压缩的byte数组处理成一个二进制字符串
		for(byte b:bytes){
			sb.append(huffCodes.get(b));
		}
		//定义长度
		int len;
		if(sb.length()%8==0){
			len = sb.length()/8;
		}else{
			len = sb.length()/8+1;
		}
		//用于存储压缩后的byte
		byte[] by = new byte[len];
		//记录新byte的位置
		int index=0;
		for(int i=0;i<sb.length();i+=8){
			String strByte;
			if(i+8<=sb.length()){
				strByte = sb.substring(i,i+8);
			}else{
				strByte = sb.substring(i);
			}
			byte byt = (byte)Integer.parseInt(strByte, 2);
			by[index] = byt;
			index++;
		}
		return by;
	}

}

运行结果：
{100=1110, 117=1011, 32=01, 101=00, 115=1010, 97=11111, 110=11110, 111=100, 121=110}
27
11
[121, 111, 117, 32, 115, 101, 101, 32, 115, 101, 101, 32, 121, 111, 117, 32, 111, 110, 101, 32, 100, 97, 121, 32, 100, 97, 121]
[121, 111, 117, 32, 115, 101, 101, 32, 115, 101, 101, 32, 121, 111, 117, 32, 111, 110, 101, 32, 100, 97, 121, 32, 100, 97, 121]
you see see you one day day

来源：CSDN

作者：March.

链接：https://blog.csdn.net/qq_43635212/article/details/104087504

标签

霍夫曼

哈夫曼树