I've been trying to decode from the Huffman tree we were given. So far I have been unsuccessful.
The Huffman2.CPP...
#include <list>
#include <fstream>
#include <list>
#include <string>
#include <cassert>
#include "node.h"
#include <cctype>
// dfs traversal to assign codes to the nodes of the passed root of a huffman tree
void DFS_traverse(node* root, string code, string(&codearray)[26])
{
if (root->is_leaf())
{
// found leaf node, so we assign it to the apprpriate location in the array
codearray[static_cast<int>(root->letter - 'A')] = code;
return;
}
if (root->left)
{
// calling dfs with 0 appended to the code
// as we move to the left node
DFS_traverse(root->left, code + "0", codearray);
}
if (root->right)
{
// calling dfs with 1 appended to the code
// as we move to the right node
DFS_traverse(root->right, code + "1", codearray);
}
}
int main()
{
////////////////////// Read Probability Data /////////////////////
// Open the probability file for reading
string probfilename = "probability.txt";
ifstream probfile;
probfile.open(probfilename.c_str(), ifstream::in);
assert(probfile);
// Read in letters and associated probabilities, create a list of nodes
list<node*> node_list;
for (unsigned int n=0; !probfile.eof(); n++)
{
char c;
double p;
probfile >> c >> p;
node* newnode = new node(toupper(c), p);
node_list.push_back(newnode);
}
// Close the probability file
probfile.close();
/////////////////// Construct Huffman Coding Tree ////////////////////
while (node_list.size() > 1)
{
// sort list.
node_list.sort(comp_prob);
// extract and removing the 1st node with lowest probability.
node* node1 = node_list.front();
node_list.pop_front();
// extract and removing the 2nd node with the lowest probability.
node* node2 = node_list.front();
node_list.pop_front();
// merg and insert the new node.
node* newnode = new node(node1, node2);
newnode->letter = '\0';
node_list.push_back(newnode);
}
/////////////////// Generate Huffman Codes ////////////////////////
// get the root of the huffman tree.
node* root = node_list.front();
node_list.pop_front();
// traverse and generate huffman codes
string codearray[26];
DFS_traverse(root, "", codearray);
// print out the code for each letter
for (char ch = 'A'; ch <= 'Z'; ++ch)
{
cout << ch << "\t" << codearray[static_cast<int>(ch - 'A')] << endl;
}
///////////////////////// Encode Input File ////////////////////////////////
// Open the text file for reading
string textfilename = "input.txt";
ifstream textfile;
textfile.open(textfilename.c_str(), ifstream::in);
assert(textfile);
// Open the file for writing encoded text
string encodedfilename = "encoded.txt";
fstream encodedfile;
encodedfile.open(encodedfilename.c_str(), fstream::out | fstream::in | fstream::trunc);
assert(encodedfile);
if (encodedfile.is_open())
{
// Input and encode each character from the input file one by one
// and output them to the output file
while (!textfile.eof())
{
// Read the character and convert it to uppercase
char c;
textfile.get(c);
c = toupper(c);
if (( c >= 'A') && (c <= 'Z'))
{
string s;
if (isalpha(c))
{
s += codearray[static_cast<int>(c - 'A')];
}
if (!s.empty())
encodedfile << s;
else
{
cout << "Error: cannot find the code for " << c << endl;
return 0;
}
}
else
encodedfile << c;
}
}
else
{
cout << "Error: output file cannot be opened!\n";
textfile.close();
return 0;
}
// Close the text file
textfile.close();
//////////////////////// Decode the Encoded File ///////////////////////////
// Reset the encoded text file for reading
encodedfile.clear();
encodedfile.seekg(0, ios::beg);
// Open the file for writing decoded text
string decodedfilename = "decoded.txt";
ofstream decodedfile;
decodedfile.open(decodedfilename.c_str(), ofstream::out);
assert(decodedfile);
if (decodedfile.is_open())
{
while (!encodedfile.eof())
{
string s = "";
char c;
encodedfile.get(c);
if ((c != '0') && (c != '1'))
{
decodedfile << c;
continue;
}
// PROBLEM IN THIS REGION
}
}
else
{
cout << "output decoded file cannot be opened!\n";
encodedfile.close();
return 0;
}
// Close the input and output files for decoding
encodedfile.close();
decodedfile.close();
// Please call clean(root), where root is the root node of the huffman tree
clean(root);
return 0;
}
The Node.h...
// FILE: node.h
// CLASS PROVIDED: node
// Each node on a binarytree has the following features:
// letter: a character. If an internal node, the NULL character ('\0') is used.
// probality: a double value showing the probability of occurance
// left: a pointer to the left child
// right: a pointer to the right child
#ifndef NODE_H
#define NODE_H
#include <iostream>
using namespace std;
class node
{
public:
char letter;
double probability;
string code;
node* left;
node* right;
node(char c, double p)
{
letter = c;
probability = p;
left = right = NULL;
}
node(node* lptr, node* rptr)
{
left = lptr;
right = rptr;
if ((lptr != NULL) && (rptr != NULL))
probability = lptr->probability + rptr->probability;
}
bool is_leaf() const
{
if ((left == NULL) && (right == NULL)) return true;
return false;
}
};
// NON-MEMBER FUNCTION
// Define a function comparing the probability
// purpose: to sort the node list in the order of ascending probability
bool comp_prob(node* first, node* second)
{
if (first->probability < second->probability)return true;
return false;
}
// Clean all nodes in the tree
void clean(node* n)
{
if (n->is_leaf())
{
delete n;
return;
}
clean(n->left);
clean(n->right);
}
#endif
I am trying to use the Huffman tree to convert the read char(s) to the matching character in the Huffman tree and print to decoded file, otherwise I'll keep reading in characters until a valid code is read. so far I can only print out the special character from the file I encoded earlier in the script. I cannot edit the node header file as a rule
The encoded file is saved as...
0010100011111000'0101011 11011100 00001111000 1110110111100000001001110101 1110110111001001.....................
But the Decoded file is saved as
' .
.
' , ' . , , .
' - - .........................it continues on this way.
Instead of decoding into this...
They're on my Linked List.
As some day it may happen that an exam must be sat.
I've got a little list, I've got a linked list.
Of Data Structure topics that should be got down pat,
And that never should be missed, that never should be missed..............
There is more to these pages, but im trying to keep this short.
This is Probability.txt
A 0.0805 B 0.0162 C 0.0320 D 0.0365 E 0.1231 F 0.0228 G 0.0161 H 0.0514 I 0.0718 J 0.0010 K 0.0052 L 0.0403 M 0.0225 N 0.0719 O 0.0794 P 0.0229 Q 0.0020 R 0.0603 S 0.0659 T 0.0959 U 0.0310 V 0.0093 W 0.0203 X 0.0020 Y 0.0188 Z 0.0009