4

What I understand already

I understand that median of medians algorithm(I will denote as MoM) is a high constant factor O(N) algorithm. It finds the medians of k-groups(usually 5) and uses them as the next iteration's sets to find medians of. The pivot after finding this will be between 3/10n and 7/10n of the original set, where n is the number of iterations it took to find the one median base case.

I keep getting a segmentation fault when I run this code for MoM, but I'm not sure why. I've debugged it and believe that the issue lies with the fact that I'm calling medianOfMedian(medians, 0, medians.size()-1, medians.size()/2);. However, I thought that this was logically sound since we were supposed to recursively find the median by calling itself. Perhaps my base case isn't correct? In a tutorial by YogiBearian on youtube(a stanford professor, link: https://www.youtube.com/watch?v=YU1HfMiJzwg ), he did not state any extra base case to take care of the O(N/5) operation of recursion in MoM.

Complete Code

Note: Per suggestions, I have added a base case and used .at() function by vectors.

static const int GROUP_SIZE = 5;
/* Helper function for m of m. This function divides the array into chunks of 5 
 * and finds the median of each group and puts it into a vector to return.
 * The last group will be sorted and the median will be found despite its uneven size.
 */
vector<int> findMedians(vector<int>& vec, int start, int end){
    vector<int> medians;
    for(int i = start; i <= end; i+= GROUP_SIZE){
        std::sort(vec.begin()+i, min(vec.begin()+i+GROUP_SIZE, vec.end()));
        medians.push_back(vec.at(min(i + (GROUP_SIZE/2), (i + end)/2)));
    }
    return medians;
}

/* Job is to partition the array into chunks of 5(subject to change via const)
 * And then find the median of them. Do this recursively using select as well.
 */
int medianOfMedian(vector<int>& vec, int start, int end, int k){
    /* Acquire the medians of the 5-groups */
    vector<int> medians = findMedians(vec, start, end);

    /* Find the median of this */
    int pivotVal;
    if(medians.size() == 1)
        pivotVal = medians.at(0);
    else
        pivotVal = medianOfMedian(medians, 0, medians.size()-1, medians.size()/2);

    /* Stealing a page from select() ... */
    int pivot = partitionHelper(vec, pivotVal, start, end);

    cout << "After pivoting with the value " << pivot << " we get : " << endl;
    for(int i = start; i < end; i++){
        cout << vec.at(i) << ", ";
    }
    cout << "\n\n" << endl;
    usleep(10000);
    int length = pivot - start + 1;
    if(k < length){
        return medianOfMedian(vec, k, start, pivot-1);
    }
    else if(k == length){
        return vec[k];
    }
    else{
        return medianOfMedian(vec, k-length, pivot+1, end);
    }

}

Some extra functions for helping unit test

Here are some unit tests that I wrote for these 2 functions. Hopefully they help.

vector<int> initialize(int size, int mod){
    int arr[size];
    for(int i = 0; i < size; i++){
    arr[i] = rand() % mod;
    }
    vector<int> vec(arr, arr+size);
    return vec;
}

/* Unit test for findMedians */
void testFindMedians(){
    const int SIZE = 36;
    const int MOD = 20;
    vector<int> vec = initialize(SIZE, MOD);
    for(int i = 0; i < SIZE; i++){
        cout << vec[i] << ", ";
    }
    cout << "\n\n" << endl;

    vector<int> medians = findMedians(vec, 0, SIZE-1);

    cout << "The 5-sorted version: " << endl;
    for(int i = 0; i < SIZE; i++){
        cout << vec[i] << ", ";
    }
    cout << "\n\n" << endl;

    cout << "The medians extracted: " << endl;
    for(int i = 0; i < medians.size(); i++){
        cout << medians[i] << ", ";
    }
    cout << "\n\n" << endl;
}

/* Unit test for medianOfMedian */
void testMedianOfMedian(){
    const int SIZE = 30;
    const int MOD = 70;
    vector<int> vec = initialize(SIZE, MOD);
    cout << "Given array : " << endl;
    for(int i = 0; i < SIZE; i++){
        cout << vec[i] << ", ";
    }
    cout << "\n\n" << endl;
    int median = medianOfMedian(vec, 0, vec.size()-1, vec.size()/2); 
    cout << "\n\nThe median is : " << median << endl;

    cout << "As opposed to sorting and then showing the median... : " << endl;
    std::sort(vec.begin(), vec.end());
    cout << "sorted array : " << endl;
    for(int i = 0; i < SIZE; i++){
        if(i == SIZE/2)
            cout << "**";
        cout << vec[i] << ", ";
    }
    cout << "Median : " << vec[SIZE/2] << endl;
}

Extra section about the output that I'm getting

Given array :
7, 49, 23, 48, 20, 62, 44, 8, 43, 29, 20, 65, 42, 62, 7, 33, 37, 39, 60, 52, 53, 19, 29, 7, 50, 3, 69, 58, 56, 65,

After pivoting with the value 5 we get :
23, 29, 39, 42, 43,

After pivoting with the value 0 we get :
39,

Segmentation Fault: 11

It seems all right and dandy until the segmentation fault. I'm confident that my partition function works as well(was one of the implementations for the leetcode question).

Disclaimer: This is not a homework problem, but rather my own curiosity about the algorithm after I used quickSelect in a leetcode problem set.

Please let me know if my question proposed requires more elaboration for MVCE, thanks!

EDIT: I figured out that the recursion partition scheme is wrong in my code. As Pradhan has pointed out - I somehow have empty vectors which lead to the start and end being 0 and -1 respectively, causing me to have segmentation fault from an infinite loop of calling it. Still trying to figure this part out.

Andreas Wenzel
  • 4,984
  • 2
  • 7
  • 24
OneRaynyDay
  • 2,990
  • 1
  • 14
  • 39
  • 1
    Please replace your usage of `[ ]` to access a vector's elements with `vector::at()`. Why? To ensure you did not go out-of-bounds when accessing your vector items. If you go out-of-bounds, the `at()` will throw an exception, giving you more information. If you just use `[ ]`, the behavior of the code is undefined if you go out-of-bounds. There has been several posts on SO concerning vectors and seg faults, and almost always the solution can be discovered by using `at()` to identify boundary access issues. – PaulMcKenzie Jun 19 '16 at 00:18
  • @PaulMcKenzie Ah - thank you! I was not aware of this functionality. I will use it right now and resume debugging and report back with results :) – OneRaynyDay Jun 19 '16 at 00:19
  • I think `min(vec.begin()+i+GROUP_SIZE, vec.end())` should be `min(vec.begin()+i+GROUP_SIZE, vec.begin()+end)` – Sergey Kalinichenko Jun 19 '16 at 00:29
  • @PaulMcKenzie Edited, and @dasblinkenlight I changed it but it did not affect the output. I believe the iterator arithmetic behaves the same with `vec.begin()+end` and `vec.end()`. – OneRaynyDay Jun 19 '16 at 00:31

2 Answers2

3

MoM always calls itself (to compute pivot), and thus exhibits infinite recursion. This violates the "prime directive" of recursive algorithms: at some point, the problem is "small" enough to not need a recursive call.

Scott Hunter
  • 44,196
  • 8
  • 51
  • 88
  • Hi - good catch! However, it didn't fix my segmentation fault. (Also, an infinite loop would be pretty obvious in the display but a segmentation fault wouldn't be created by infinite loops right?) It's edited now, with an added "logical" base case. I'm hoping that I'm going the right way. – OneRaynyDay Jun 19 '16 at 00:28
  • 1
    An infinite recursion would give you a segfault when allowed stack size limits are exceeded. – Pradhan Jun 19 '16 at 02:06
  • @OneRaynyDay could you check if findMedians ever ends up with end < start? – Pradhan Jun 19 '16 at 02:26
  • @Pradhan Aha - you're right, I think it does result in an infinite recursion silently because end < start, and throws a segfault. What made you come to those conclusions? – OneRaynyDay Jun 19 '16 at 03:51
  • @OneRaynyDay that's the only path I saw to an infinite recursion in your code :) Since you had eliminated out-of-bounds accesses, this seemed the most likely cause. – Pradhan Jun 19 '16 at 04:03
  • @Pradhan I see. Thank you and Scott for not only helping me find the error but also helping me learn how to deduce bugs in these situations! I will fix the error and then approve of this answer(given that there are no other bugs left) – OneRaynyDay Jun 19 '16 at 04:15
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/115027/discussion-between-oneraynyday-and-pradhan). – OneRaynyDay Jun 19 '16 at 04:35
2

Correct Implementation

With the help of Scott's hint, I was able to give a correct implementation of this median of medians algorithm. I fixed it and realized that the main idea that I had was correct, but there were a couple errors:

  • My base case should be for subvectors in the size of <=5.

  • There were some small subtleties about whether the last number(variable end), in this case should be considered to be included or as the upper bound less than. In this implementation below I made it the upper bound less than definition.

Here it is below. I also accepted Scott's answer - thank you Scott!

/* In case someone wants to pass in the pivValue, I broke partition into 2 pieces.
 */
int pivot(vector<int>& vec, int pivot, int start, int end){

    /* Now we need to go into the array with a starting left and right value. */
    int left = start, right = end-1;
    while(left < right){
        /* Increase the left and the right values until inappropriate value comes */
        while(vec.at(left) < pivot && left <= right) left++;
        while(vec.at(right) > pivot && right >= left) right--;

        /* In case of duplicate values, we must take care of this special case. */
        if(left >= right) break;
        else if(vec.at(left) == vec.at(right)){ left++; continue; }

        /* Do the normal swapping */
        int temp = vec.at(left);
        vec.at(left) = vec.at(right);
        vec.at(right) = temp;
    }
    return right;
}


/* Returns the k-th element of this array. */
int MoM(vector<int>& vec, int k, int start, int end){
    /* Start by base case: Sort if less than 10 size
     * E.x.: Size = 9, 9 - 0 = 9.
     */
    if(end-start < 10){
        sort(vec.begin()+start, vec.begin()+end);
        return vec.at(k);
    }

    vector<int> medians;
    /* Now sort every consecutive 5 */
    for(int i = start; i < end; i+=5){
        if(end - i < 10){
            sort(vec.begin()+i, vec.begin()+end);
            medians.push_back(vec.at((i+end)/2));
        }
        else{
            sort(vec.begin()+i, vec.begin()+i+5);
            medians.push_back(vec.at(i+2));
        }
    }

    int median = MoM(medians, medians.size()/2, 0, medians.size());

    /* use the median to pivot around */
    int piv = pivot(vec, median, start, end);
    int length = piv - start+1;

    if(k < length){
        return MoM(vec, k, start, piv);
    }
    else if(k > length){
        return MoM(vec, k-length, piv+1, end);
    }
    else
        return vec[k];
}
OneRaynyDay
  • 2,990
  • 1
  • 14
  • 39