Not so easy as initially thought.
And, there are of cause many possible solutions. I show you 3 solutions with increasing complexity, but all are using the same algorithm.
- We read the complete text file into a
std::string
- We replace all newlines '\' with a space ' '
- We replace all dot space ". " sequences with ".\n"
- We split the complete text into sentences using the delimiter "."
Please note that the 3rd solution does not need any loop and is using modern C++ language elements and algorithms. But, no need to explain it here, because nobody will use it.
Please see:
#include <iostream>
#include <string>
#include <sstream>
#include <iterator>
#include <regex>
#include <algorithm>
std::istringstream fv{ R"(AMBITIONI DEDISSE SCRIPSISSE IUDICARETUR. ARAS
MATTIS IUDICIUM PURUS SIT AMET FERMENTUM. AONEC
SED ODIO OPERAE, EU VULPUTATE FELIS RHONCUS.
)" };
// either 1 or 2 or 3
#define COMPLEXITY 1
#if COMPLEXITY == 1
void replace(std::string& text, const std::string search, const std::string& replace) {
// Search, if the search string is in the text at all
size_t pos = text.find(search);
// Could we find it?
while (pos != std::string::npos) {
// We found it. Replace found text
text.replace(pos, search.length(), replace);
// Check, if there are more search strings in the text
pos = text.find(search, pos);
}
}
int main() {
// Here we will store the content of the complete text file
std::string text;
char c;
// Read all characters from the text file and store them in one strinf
while (fv.get(c))
text.push_back(c);
// Replace all '\n' by space
replace(text, "\n", " ");
// Replace all ". " by ".\n"
replace(text, ". ", ".\n");
// Put in stringstream for extraction with getline
std::istringstream iss(text);
// Here we will store all sentences
std::vector<std::string> sentences;
// Read all sentences from stringstream
std::string line;
while(std::getline(iss,line))
sentences.push_back(line);
// Show output
for (const std::string& s : sentences)
std::cout << s << "\n";
return 0;
}
#elif COMPLEXITY == 2
std::string& replace(std::string& text, const std::string search, const std::string& replace) {
for (size_t pos{ text.find(search) }; pos != std::string::npos; pos = text.find(search, pos)) {
text.replace(pos, search.length(), replace);
}
return text;
}
int main() {
// Here we will store the content of the complete text file
std::string text;
// Read all characters from the text file and store them in one string
for (char c{}; fv.get(c); text.push_back(c)) ; // Empty loop body
// Replace all '\n' by space and replace all ". " by ".\n"
replace(replace(text, "\n", " "), ". ", ".\n");
// Put in stringstream for extraction with getline
std::istringstream iss(text);
// Here we will store all sentences
std::vector<std::string> sentences;
// Read all sentences from stringstream
for (std::string line; std::getline(iss, line); sentences.push_back(line)); // Empty body
// Show output
for (const std::string& s : sentences) std::cout << s << "\n";
return 0;
}
#elif COMPLEXITY == 3
std::regex dotSpace(R"(\. )");
int main() {
// Read the complete text file into one string
std::string text(std::istreambuf_iterator<char>(fv), {});
// Replace all '\n' by space andf replace all ". " by ".\n"
text = std::regex_replace(std::regex_replace(text, std::regex("\n"), " "), dotSpace, ".\n");
// Get sentences
std::vector<std::string> sentences(std::sregex_token_iterator(text.begin(), text.end(), dotSpace, -1), {});
// Show debug output
std::copy(sentences.begin(), sentences.end(), std::ostream_iterator<std::string>(std::cout);
return 0;
}
#endif