1

need to create a word matcher which counts how many times a specific word is mentioned in a text file. here is what i have done so far and am not sure what iv done wrong. 1 text file contains a long paragraph the other just contains a few words. I need to compare both text files e.g. the word "and" is in the short text file. need to compare this with the long paragraph and see how many time this words appears and then have a report at the end of the program which displays this.

E.g and - 6tmes, but - 0times, it - 23times.

^^ something like this. not sure how to start making this

#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main()
{
    ifstream infile("text1.txt");
    if(!infile)
    {
        cout << "Error";
    }
    string words[250];
    int counter = 0;
    while (!infile.eof() )
    {
        infile >> words[counter];

        counter++;
    }
    ifstream infile2("banned.txt");
    if(!infile2)
    {
        cout << "Error";
    }
    string bannedwords[250];
    counter = 0;
    while (!infile2.eof() )
    {
        infile2 >> words[counter];
        counter++;
    }
    int eatcount= 0;
    int orcount = 0;
    int hellocount = 0;
    int number;
    for(int i=0; i<200; i++)
    {
        for(int j = 0; j < 8; j++)
        {
            if ( words[i] == bannedwords[j])
            {
                cout << words[i] << " ";
                if (words[i]=="eat")
                {
                    eatcount++;
                }
                else if (words[i] == "or")
                {
                    orcount++;
                }
                else if (words[i]== "hello")
                {
                    hellocount++;
                }

            }

        }

    }
    cout << endl;
    cout<< "eat was found "<<eatcount<<" times";
    cout << endl;
    cout<< "or was found "<<orcount<<" times";
    cout << endl;
    cout<< "hello was found "<<hellocount<<" times";
    system("pause");
}
Brad Larson
  • 168,330
  • 45
  • 388
  • 563

2 Answers2

0

Why not use a std::multiset?

ifstream infile("text1.txt");
if(!infile)
{
    cout << "Error";
}
std::multiset<string> words;
string tmp;
while (!infile.eof() )
{
    infile >> tmp;
    words.insert(tmp);
}

Then also use a map for the banned words:

ifstream infile2("banned.txt");
if(!infile2)
{
    cout << "Error";
}
std::map<string, int> banned;
string tmp;
while (!infile2.eof() )
{
    infile2 >> tmp;
    banned.insert(tmp);
}

Then you can use std::multiset::count(string) to find the words without all the extra looping. You would only need one loop to go through your banned words list. e.g:

std::map<string, int>::iterator bannedwordIter = bannedwords.begin();
for( ; bannedwordIter != bannedwords.end(); ++bannedwordIter )
{
  bannedwordIter->second = words.count(bannedwordIter->first);

  // you could print here as you process, or have another loop that prints it all after you finish
  cout << bannedwordIter->first << " - " << bannedwordIter->second << " times." << endl;
}
AllenKll
  • 754
  • 2
  • 9
  • 18
0

A minimal way would be to use regular expressions, like so

#include <iostream>
#include <fstream>
#include <string>
#include <regex>

using namespace std;

unsigned countMatches(std::istream &is, std::string const &word)
{
    string text;
    unsigned count(0);    
    std::regex  const expression(word);
    while (getline(is, text)) {
        count += distance(sregex_iterator(
            text.begin(), text.end(), expression), sregex_iterator());
    }
    return count;
}

so you just pass it the input stream (in your case an input file stream) and it counts the occurences of the word specified after creating a regular expression that matches that word

int main()
{
    ifstream ifs;
    ifs.open("example_text_file.txt");
    cout << countMatches(ifs, "word_you_want_to_search_for") << endl;
    return 0;
}
Nikos Athanasiou
  • 24,831
  • 11
  • 72
  • 136