0

I've been trying to decode from the Huffman tree we were given. So far I have been unsuccessful.

The Huffman2.CPP...

#include <list>
#include <fstream>
#include <list>
#include <string>
#include <cassert>
#include "node.h"
#include <cctype>

// dfs traversal to assign codes to the nodes of the passed root of a huffman tree
void DFS_traverse(node* root, string code, string(&codearray)[26])
{
    if (root->is_leaf())
    {
        //  found leaf node, so we assign it to the apprpriate location in the array
        codearray[static_cast<int>(root->letter - 'A')] = code;
        return;
    }

    if (root->left)
    {
        // calling dfs with 0 appended to the code
        // as we move to the left node
        DFS_traverse(root->left, code + "0", codearray);
    }

    if (root->right)
    {
        // calling dfs with 1 appended to the code
        // as we move to the right node
        DFS_traverse(root->right, code + "1", codearray);
    }
}

int main()
{
    //////////////////////   Read Probability Data   /////////////////////

    // Open the probability file for reading
    string probfilename = "probability.txt";
    ifstream probfile;
    probfile.open(probfilename.c_str(), ifstream::in);

    assert(probfile);

    // Read in letters and associated probabilities, create a list of nodes
    list<node*> node_list;
    for (unsigned int n=0; !probfile.eof(); n++)
    {
        char c;
        double p;
        probfile >> c >> p;
        node* newnode = new node(toupper(c), p);
        node_list.push_back(newnode);
    }

    // Close the probability file
    probfile.close();

 ///////////////////   Construct Huffman Coding Tree ////////////////////

    while (node_list.size() > 1) 
    {       

        // sort list.
        node_list.sort(comp_prob);
        // extract and removing the 1st node with lowest probability.
        node* node1 = node_list.front();
        node_list.pop_front();
        // extract and removing the 2nd node with the lowest probability.
        node* node2 = node_list.front();
        node_list.pop_front();
        // merg and insert the new node.
        node* newnode = new node(node1, node2);
        newnode->letter = '\0';
        node_list.push_back(newnode);
    }
    
///////////////////   Generate Huffman Codes ////////////////////////

    // get the root of the huffman tree.
    node* root = node_list.front();
    node_list.pop_front();

    // traverse and generate huffman codes
    string codearray[26];
    DFS_traverse(root, "", codearray);

    // print out the code for each letter
    for (char ch = 'A'; ch <= 'Z'; ++ch)
    {
        cout << ch << "\t" << codearray[static_cast<int>(ch - 'A')] << endl;
    }


/////////////////////////   Encode Input File   ////////////////////////////////

    // Open the text file for reading
    string textfilename = "input.txt";
    ifstream textfile;
    textfile.open(textfilename.c_str(), ifstream::in);

    assert(textfile);

    // Open the file for writing encoded text
    string encodedfilename = "encoded.txt";
    fstream encodedfile;
    encodedfile.open(encodedfilename.c_str(), fstream::out | fstream::in | fstream::trunc);

    assert(encodedfile);

    if (encodedfile.is_open())
    {
        // Input and encode each character from the input file one by one
        // and output them to the output file
        while (!textfile.eof()) 
        {
            // Read the character and convert it to uppercase
            char c;
            textfile.get(c);
            c = toupper(c);
            if (( c >= 'A') && (c <= 'Z'))
            {
                string s;

                if (isalpha(c))
                {
                s += codearray[static_cast<int>(c - 'A')];
                }


                if (!s.empty())
                    encodedfile << s;
                else 
                {
                    cout << "Error: cannot find the code for " << c << endl;
                    return 0;
                }
            }
            else 
                encodedfile << c;
        }
    } 
    else
{
    cout << "Error: output file cannot be opened!\n";
    textfile.close();
    return 0;
}

    // Close the text file
    textfile.close();

//////////////////////// Decode the Encoded File ///////////////////////////

    // Reset the encoded text file for reading
    encodedfile.clear();
    encodedfile.seekg(0, ios::beg);

    // Open the file for writing decoded text
    string decodedfilename = "decoded.txt";
    ofstream decodedfile;
    decodedfile.open(decodedfilename.c_str(), ofstream::out);

    assert(decodedfile);

    if (decodedfile.is_open())
   {
        while (!encodedfile.eof())
        {             
            string s = "";
            char c;
            encodedfile.get(c);
            if ((c != '0') && (c != '1'))
            {

                decodedfile << c;
                continue;
            }

        
        
        // PROBLEM IN THIS REGION
        
        
        }
    }
    else 
    {
        cout << "output decoded file cannot be opened!\n";
        encodedfile.close();
        return 0;
    }

    // Close the input and output files for decoding
    encodedfile.close();
    decodedfile.close();

    //  Please call clean(root), where root is the root node of the huffman tree
    clean(root);

    return 0;
}

The Node.h...

// FILE: node.h 
// CLASS PROVIDED: node
//     Each node on a binarytree has the following features: 
//     letter: a character. If an internal node, the NULL character ('\0') is used.
//     probality: a double value showing the probability of occurance
//     left: a pointer to the left child
//     right: a pointer to the right child


#ifndef NODE_H
#define NODE_H

#include <iostream> 
using namespace std;

class node
{
public:
    char letter;
    double probability;
    string code;
    node* left;
    node* right;

    node(char c, double p)
    {
        letter = c;
        probability = p;
        left = right = NULL;
    }

    node(node* lptr, node* rptr)
    {
        left = lptr;
        right = rptr;
        if ((lptr != NULL) && (rptr != NULL))
            probability = lptr->probability + rptr->probability;
    }

    bool is_leaf() const
    {
        if ((left == NULL) && (right == NULL)) return true;
        return false;
    }
};

// NON-MEMBER FUNCTION
// Define a function comparing the probability
// purpose: to sort the node list in the order of ascending probability

bool comp_prob(node* first, node* second)
{
    if (first->probability < second->probability)return true;

    return false;
}

// Clean all nodes in the tree
void clean(node* n)
{
    if (n->is_leaf())
    {
        delete n;
        return;
    }

    clean(n->left);
    clean(n->right);
}

#endif

I am trying to use the Huffman tree to convert the read char(s) to the matching character in the Huffman tree and print to decoded file, otherwise I'll keep reading in characters until a valid code is read. so far I can only print out the special character from the file I encoded earlier in the script. I cannot edit the node header file as a rule

The encoded file is saved as...

0010100011111000'0101011 11011100 00001111000 1110110111100000001001110101 1110110111001001.....................

But the Decoded file is saved as

' .

       .

' , ' . , , .

' - - .........................it continues on this way.

Instead of decoding into this...

They're on my Linked List.

As some day it may happen that an exam must be sat.
I've got a little list, I've got a linked list.
Of Data Structure topics that should be got down pat,
And that never should be missed, that never should be missed..............

There is more to these pages, but im trying to keep this short.

This is Probability.txt

A 0.0805 B 0.0162 C 0.0320 D 0.0365 E 0.1231 F 0.0228 G 0.0161 H 0.0514 I 0.0718 J 0.0010 K 0.0052 L 0.0403 M 0.0225 N 0.0719 O 0.0794 P 0.0229 Q 0.0020 R 0.0603 S 0.0659 T 0.0959 U 0.0310 V 0.0093 W 0.0203 X 0.0020 Y 0.0188 Z 0.0009

Manny
  • 9
  • 2
  • When you step through the program with the debugger that came with your development tools, keep an eye out for where the program does something unexpected. – user4581301 Dec 04 '20 at 06:30
  • Keep an eye on `for (unsigned int n=0; !probfile.eof(); n++)` it looks like a variant of [Why is iostream::eof inside a loop condition (i.e. `while (!stream.eof())`) considered wrong?](https://stackoverflow.com/questions/5605125/why-is-iostreameof-inside-a-loop-condition-i-e-while-stream-eof-cons) – user4581301 Dec 04 '20 at 06:38
  • Don't read in as a `double`. Floating point numbers are approximations and will almost certainly lead to a messed up bit pattern sooner or later. – user4581301 Dec 04 '20 at 06:40
  • I am not allowed to change the node.h file in any way. That was my professors implementation not my own. – Manny Dec 04 '20 at 06:53
  • Do you see a conflict between that and the recommendations in above comments? – Yunnosch Dec 04 '20 at 07:15
  • Can you try a simpler thing first? E.g. output the encoding table? That would also allow to do the decoding with pen and paper and verify the table. – Yunnosch Dec 04 '20 at 07:19
  • @Yunnosch I may be misreading what's supposed to be the input here. probability.txt does sound like the sort of thing that would hold a double value formatted as a text string. I'm probably looking at the wrong file. What is probability.txt supposed to look like? – user4581301 Dec 04 '20 at 07:27
  • Good question. I recommend to make OP clarify that. – Yunnosch Dec 04 '20 at 07:29
  • @user4581301 This is necessary because the probability input is a double. I could use a float but that wouldn't change anything regardless, especially in the decoding scheme. – Manny Dec 04 '20 at 14:44

0 Answers0