#include"pch.h"
#define _CRT_SECURE_NO_WARNINGS//避免strcpy()不能使用
#include <stdio.h>
#include<iostream>
#include<string.h>
#include<iomanip>//需要控制格式
// 这个头文件是声明一些 “流操作符”的
//比较常用的有:setw(int);//设置显示宽度,left//right//设置左右对齐。 setprecision(int);//设置浮点数的精确度。
using namespace std;
using std::strcpy;
typedef char **HuffmanCode;//哈夫曼编码存放的二维动态数组
typedef struct {
int weight;
int parent, lchild, rchild;
}HTnode, *HuffmanTree;
typedef struct {
char ch;
int weight;
}CharArray;
//1.选择权值最小的2个结点
void Select(HTnode a[], int n, int &s1, int &s2) {//n个结点中找最小2个结点,s1最小,s2次小
for (int i = 1; i <= n; i++) {
if (a[i].parent == 0) {
s1 = i; break;
}
}
for (int i = 1; i <= n; i++) {
if (a[i].parent == 0 && a[i].weight < a[s1].weight) {
s1 = i;
}
}
//寻找s2,避免选择s1,步骤和找s1一样
for (int i = 1; i <= n; i++) {
if (i != s1 && a[i].parent == 0) {
s2 = i;
break;
}
}
for (int i = 1; i <= n; i++) {
if (i != s1 && a[i].parent == 0 && a[i].weight < a[s2].weight) {
s2 = i;
}
}
}
//2.构造哈夫曼树
void CreateHuffmanTree(HuffmanTree & HT, int n, CharArray x[]) {
if (n <= 1) return;
int m = 2 * n - 1, i;
HT = new HTnode[m + 1];
//构造2*n的数组,从1开始存放
//初始化
for (i = 1; i <= m; i++) {
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
HT[i].weight = 0;
}
//输入权重,后面我们要自定义权重
for (i = 1; i <= n; i++) {
HT[i].weight = x[i - 1].weight;
}
//======================初始化结束,开始创建
int s1, s2;
for (i = n + 1; i <= m; i++) {
Select(HT, i - 1, s1, s2);
//在HT[k] (1<=k<=i-1)中选择两个parent=0且权值最小的结点,返回s1,s2
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[s1].parent = i; HT[s2].parent = i;
//得到新的结点,从森林中删除s1,s2,把他们双亲域改为i
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
}
//打印哈夫曼树表格
void print(HTnode HT[], int n) {
cout << "index weight parent lchild rchild value" << endl;
cout << left;//左对齐输出
for (int i = 1; i <= n; i++) {
cout << setw(5) << i << " ";
cout << setw(6) << HT[i].weight << " ";
cout << setw(6) << HT[i].parent << " ";
cout << setw(6) << HT[i].lchild << " ";
cout << setw(6) << HT[i].rchild << " ";
cout << endl;
}
}
//编码
void CreateHuffmanCode(HuffmanTree HT, HuffmanCode &HC, int n) {
//从叶子开始到根逆向求每个字符的哈夫曼编码,存储在编码表HC中
HC = new char*[n + 1];//存储n个字符编码表空间
char *cd = new char[n];//动态存储n个字符编码,后面需要释放
cd[n - 1] = '\0';
int i = 1, start = n - 1, c = -1, f;//c结点,f为c的parent
for (i = 1; i <= n; i++) {//逐个字符求哈夫曼编码
start = n - 1;
c = i;
f = HT[c].parent;
while (f != 0) {//从叶子结点开始向上
--start;
if (HT[f].lchild == c) {
cd[start] = '0';
}
else if (HT[f].rchild == c) {
cd[start] = '1';
}
c = f; f = HT[c].parent;
}
HC[i] = new char[n - start];
strcpy(HC[i], &cd[start]);
}
delete cd;
cd = NULL;
}
int main() {
cout << "Please input the exam string,ends with'#'" << endl;
int data[56];
char ch; int i = 0, sum = 0;
char *CH = new char[1000];
memset(data, 0, sizeof(data));//初始化为0
//分析过输入的字符类型,规定0-25为A-Z,26-51为a-z,52为',' ,53为'.' ,54为'\'' ,55为空格,
//题目给的引号字符是中文的,会影响后面输出,所以我改为英文的操作
//分56个组
while ((ch = getchar()) != '#') {//用#标记结束
if (ch != ' '&&ch != '\n'&&ch != '\0') {
if ((int)ch <= 90 && (int)ch >= 65)
{
data[(int)ch - 65] += 1; sum += 1;
}
else if ((int)ch <= 122 && (int)ch >= 97)
{
data[(int)ch - 71] += 1; sum += 1;
}
else if (ch == ',') {
data[52] += 1; sum += 1;
}
else if (ch == '.')
{
data[53] += 1; sum += 1;
}
else if (ch == '\'')
{
data[54] += 1; sum += 1;
}
CH[i] = ch; i++;
}
else if (ch == ' ') {
data[55] += 1; sum += 1;
}
}
cout << "显示所有统计结果" << endl;
for (i = 0; i <= 55; i++) {
if (0 <= i && i <= 25) {
cout << (char)(i + 65) << "\t" << data[i] << endl;
}
else if (26 <= i && i <= 51) {
cout << (char)(i + 71) << "\t" << data[i] << endl;
}
else if (i == 52) {
ch = ',';
cout << "英文逗号\t" << ch << "\t" << data[i] << endl;
}
else if (i == 53) {
ch = '.';
cout << "英文句号\t" << ch << "\t" << data[i] << endl;
}
else if (i == 54) {
ch = '\'';
cout << "英文文符号\t" << ch << "\t" << data[i] << endl;
}
else if (i == 55) {
ch = 32;
cout << "空格符号\t" << ch << "\t" << data[i] << endl;
}
}
//剔除没有的
CharArray * charArray = new CharArray[56];
int count = 0;
for (i = 0; i <= 55; i++) {
if (data[i] != 0)
{
if (0 <= i && i <= 25) {
ch = (i + 65);
charArray[count].weight = data[i];
charArray[count++].ch = ch;
}
else if (26 <= i && i <= 51) {
ch = (i + 71);
charArray[count].weight = data[i];
charArray[count++].ch = ch;
}
else if (i == 52) {
ch = ',';
charArray[count].weight = data[i];
charArray[count++].ch = ch;
}
else if (i == 53) {
ch = '.';
charArray[count].weight = data[i];
charArray[count++].ch = ch;
}
else if (i == 54) {
ch = '\'';
charArray[count].weight = data[i];
charArray[count++].ch = ch;
}
else if (i == 55) {
ch = 32;//ASCILL=32
charArray[count].weight = data[i];
charArray[count++].ch = ch;
}
}
}
int n = count;//CharArray数组的有效长度
cout << "显示所有出现字符的统计结果" << endl;
cout << "总共出现字符个数" << sum << endl;
cout << "出现字符个数和频率" << endl;
cout << "有效位有" << n << endl;
for (i = 0; i < n; i++)
cout << "ch=" << charArray[i].ch << "\tnumber=" << charArray[i].weight << "\tfrequency=" << (double)charArray[i].weight / sum << endl;
HuffmanTree HT; // 动态创建数组
CreateHuffmanTree(HT, n, charArray);
print(HT, 2 * n - 1);
cout << "生成哈夫曼编码" << endl;
HuffmanCode HC;
CreateHuffmanCode(HT, HC, n);
for (int i = 1; i <= n; i++) {
cout << "ch=" << charArray[i - 1].ch << "\tcode= " << HC[i] << ends<<(int)charArray[i - 1].ch<<endl;
}
delete CH;
delete charArray;
CH = NULL;
charArray = NULL;
return 0;
}
/*
测试案例:
The Chinese official said he viewed the Trump Presidency not as an aberration but as the product of a failing political system. This jibes with other accounts. The Chinese leadership believes that the United States, and Western democracies in general, haven’t risen to the challenge of a globalized economy, which necessitates big changes in production patterns, as well as major upgrades in education and public infrastructure. In Trump and Trumpism, the Chinese see an inevitable backlash to this failure.
*/
来源:CSDN
作者:广大菜鸟
链接:https://blog.csdn.net/weixin_44001521/article/details/103748773