0

So I've posted about this recently, but I'm still at a loss for what is going wrong. Specifically, I can't seem to figure out what's causing my AVL Tree to take so long to sort. I read in a file of 500,000 random, unsorted numbers to sort by using a vector in a for loop to feed the tree the numbers one at a time. Now, I've also tested using a normal BST, as someone mentioned that having to create so many nodes one at a time might be why it's taking so long, but that completed in only 5 seconds, with only 12,164 nodes skipped due to being duplicates. My AVL Tree is taking upwards of 3 hours just to sort half the list, so something must be going wrong. Can anyone figure out what it is? As far as I know, the rebalancing and insertion logic is correct, because whenever I ran a bunch of test cases on it they all came out fine. I can't seem to track down where the problem is. Here's my full code for anyone that wants to check it out. Main is kind of a mess right now because of all the stuff I've included for testing purposes (like the tracking loop), but most of that will be gone in the final version.

EDIT:

This question has been answered.

#include <iostream>
#include<iomanip>
#include <time.h>
#include <vector>
#include <fstream>
using namespace std;

vector<int> numbers;

struct node
{
public:
    int data, height;
    node *leftChild, *rightChild;
};

node* root = NULL;

int findMin(node *p) // finds the smallest node in the tree
{
    while (p->leftChild != NULL)
        p = p->leftChild;
    return p->data;
}
int findMax(node *p) // finds the largest node in the tree
{
    while(p->rightChild != NULL)
        p = p->rightChild;
    return p->data;
}
int max(int a, int b) // gets the max of two integers
{
    if(a > b)
        return a;
    else
        return b;
}
int height(node *p) // gets the height of the tree
{
    if(p == NULL)
        return -1;
    else
    {
        p->height = max(height(p->leftChild), height(p->rightChild)) + 1;
    }
    return p->height;
}
node* newNode(int element) // helper function to return a new node with empty subtrees
{
    node* newPtr = new node;
    newPtr->data = element;
    newPtr->leftChild = NULL;
    newPtr->rightChild = NULL;
    newPtr->height = 1;
    return newPtr;
}
node* rightRotate(node* p) // function to right rotate a tree rooted at p
{
    node* child = p->leftChild; // rotate the tree
    p->leftChild = child->rightChild;
    child->rightChild = p;

    // update the height for the nodes
    p->height = height(p);
    child->height = height(child);
    // return new root
    return child;

}
node* leftRotate(node* p) // function to left rotate a tree rooted at p
{
    node* child = p->rightChild; // perform the rotation
    p->rightChild = child->leftChild;
    child->leftChild = p;

    // update the heights for the nodes
    p->height = height(p);
    child->height = height(child);

    // return new root
    return child;
}

int getBalance(node *p)
{
    if(p == NULL)
        return 0;
    else
        return height(p->leftChild) - height(p->rightChild);
}
// recursive version of BST insert to insert the element in a sub tree rooted with root
// which returns new root of subtree
node* insert(node*& p, int element)
{
    // perform the normal BST insertion
    if(p == NULL) // if the tree is empty
        return(newNode(element));
    if(element < p->data)
    {
        p->leftChild = insert(p->leftChild, element);
    }
    else
    {
        p->rightChild = insert(p->rightChild, element);
    }

    // update the height for this node
    p->height = height(p);

    // get the balance factor to see if the tree is unbalanced
    int balance = getBalance(p);

    // the tree is unbalanced, there are 4 different types of rotation to make

    // Single Right Rotation (Left Left Case)
    if(balance > 1 && element < p->leftChild->data)
    {
        return rightRotate(p);
    }
    // Single Left Rotation (Right Right Case)
    if(balance < -1 && element > p->rightChild->data)
    {
        return leftRotate(p);
    }
    // Left Right Rotation (double left rotation)
    if(balance > 1 && element > p->leftChild->data)
    {
        p->leftChild = leftRotate(p->leftChild);
        return rightRotate(p);
    }
    // Right Left Rotation
    if(balance < -1 && element < p->rightChild->data)
    {
        p->rightChild = rightRotate(p->rightChild);
        return leftRotate(p);
    }
    // cout << "Height: " << n->height << endl;
    // return the unmodified root pointer in the case that the tree does not become unbalanced
    return p;
}
void inorder(node *p)
{
    if(p != NULL)
    {
        inorder(p->leftChild);
        cout << p->data << ", ";
        inorder(p->rightChild);
    }
}
void preorder(node *p)
{
    if(p != NULL)
    {
        cout << p->data << ", ";
        preorder(p->leftChild);
        preorder(p->rightChild);
    }
}

void print(node* root)
{
    /*cout << "Min Value: " << findMin(root) << endl;
    cout << "Max Value: " << findMax(root) << endl;
    cout << "Pre Order: ";
    preorder(root); */
    cout << endl << "Inorder: ";
    inorder(root);
    cout << endl << endl << endl << endl;

}

void read()
{
    int num;
    ifstream file_save("data.txt");
    if(file_save.is_open())
    {
        while(!file_save.eof())
        {
            file_save >> num;
            numbers.push_back(num);
        }
        file_save.close();
    }
    else
    {
        cout << "Error in opening file!!" << endl;
    }
}

int main()
{
    double duration;
    time_t begin = time(0);

    read();
    int x = 0;
    int track = 0;
    for (std::vector<int>::const_iterator i = numbers.begin(); i != numbers.begin() + 100000; ++i)
    {
        root = insert(root, numbers[x]);
        x++;
        track++;
        if( (track % 10000) == 0)
        {
            cout << track << " iterations" << endl;
            time_t now = time(0);
            cout << now - begin << " seconds" << endl;
        }

    }
    time_t end = time(0);
    duration = end - begin;
    // print(root);
    cout << "The algorithm took " << duration << " seconds to complete." << endl;
    return 0;
}
Sean Madson
  • 41
  • 1
  • 6
  • Your program segfaults rather quickly, not sure why you think it takes a long time to run. Perhaps creating a tree of height 160-something when you have a data file with 10 numbers has something to do with it. Also [while(eof) is wrong](http://stackoverflow.com/questions/5605125/why-is-iostreameof-inside-a-loop-condition-considered-wrong) – n. 'pronouns' m. Apr 24 '17 at 05:05
  • I'm not sure I follow what you mean, could you elaborate? – Sean Madson Apr 24 '17 at 05:28
  • I have tried your program. It crashes when the input file has about 10 numbers. I have also ran it in a debugger and observed that the tree it creates is invalid. Now launch your own debugger and start debugging. – n. 'pronouns' m. Apr 24 '17 at 05:50
  • I'm not sure why you say that, because neither of those things are true for me. Also, how is the tree invalid? Care to provide specifics, because I've debugged this more than a few times, and I haven't noticed it creating an invalid tree. – Sean Madson Apr 24 '17 at 08:13
  • Does [this](https://pastebin.com/tLvi2ZxM) look like a valid AVL tree to you? It surely doesn't to me. If you have verified that your AVL tree is valid, whaat data file was it verified on? – n. 'pronouns' m. Apr 24 '17 at 08:58
  • So, here's what I get from running the following code int array[] = {56, 35, 239, 64, 843, 54, 65, 786, 59, 20}; for(int i = 0; i < 10; i++) { root = insert(root, array[i]); } Pre Order: 56, 35, 20, 54, 239, 64, 59, 65, 843, 786, Inorder: 20, 35, 54, 56, 59, 64, 65, 239, 786, 843, – Sean Madson Apr 24 '17 at 09:16
  • `int array[] = { ... ` This is not code you have posted. You have a `read` function and `i != numbers.begin() + 100000;` which makes your posted code expect at least 100000 numbers in the file. – n. 'pronouns' m. Apr 24 '17 at 10:01

2 Answers2

5

There are many problems with this code.

  1. while(eof) is wrong.
  2. The main loop expects exactly 100000 elements.
  3. All key comparisons are exact (<, >). There are no rotations performed when a duplicate element is inserted. Thus a tree of identical elements will not be balanced at all.
  4. The height of an empty tree is hardcoded to -1, but the height of a single-node three is initially set to 1, thus violating the invariant height(node) = 1+max(height(node->leftChild))+height(node->rightChild)).
  5. height traverses the entire tree every time it is called, thus making insertion O(n).
Community
  • 1
  • 1
n. 'pronouns' m.
  • 95,181
  • 13
  • 111
  • 206
  • My main question has been answered, however every time I try using while(file_save >> num) instead of while(!file_save.eof()), I wind up losing half my data. – Sean Madson Apr 24 '17 at 10:47
  • You may want to ask a separate question about that, though my guess is that you have not removed `file_save >> num` from your loop *body*. – n. 'pronouns' m. Apr 24 '17 at 11:00
0

So, it seems to me that the reason that it was taking so long was because of too many recursive calls all over the place. This modified code has less recursive calls and thus bogs down the CPU with less stacks to have to process. At least, that's what I'm getting out of this.

void newHeight(node* p)
{
    double leftHeight = height(p->leftChild);
    double rightHeight = height(p->rightChild);
    if(leftHeight > rightHeight)
        p->height = leftHeight;
    else
        p->height = rightHeight;
}

node* rotateright(node* p) // the right rotation round p
{
    node* q = p->leftChild;
    p->leftChild = q->rightChild;
    q->rightChild = p;
    newHeight(p);
    newHeight(q);
    return q;
}

node* rotateleft(node* q) // the left rotation round q
{
    node* p = q->rightChild;
    q->rightChild = p->leftChild;
    p->leftChild = q;
    newHeight(q);
    newHeight(p);
    return p;
}

node* rebalance(node* p) // p node balance
{
    newHeight(p);
    if( getBalance(p)==2 )
    {
        if( getBalance(p->rightChild) < 0 )
            p->rightChild = rotateright(p->rightChild);
        return rotateleft(p);
    }
    if (getBalance(p)==-2 )
    {
        if( getBalance(p->leftChild) > 0  )
            p->leftChild = rotateleft(p->leftChild);
        return rotateright(p);
    }
    return p; // no balance needed
}

node* insert(node* p, int element) // k key insertion in the tree with p root
{
    if(!p) return newNode(element);
    if(element < p->data)
        p->leftChild = insert(p->leftChild, element);
    else
        p->rightChild = insert(p->rightChild, element);
    return rebalance(p);
}
Sean Madson
  • 41
  • 1
  • 6