I have a relatively large file that I needed to ensure contained only unique lines. The file is only 500MB. I understand that there is plenty of overhead, but I was seeing nearly 5GB of RAM usage. I could have done this using an external merge sort and maintain a small amount of RAM, but this seemed faster to code.
I am using VC++14.
#include <string>
#include <vector>
#include <fstream>
#include <iostream>
#include <algorithm>
#include <unordered_set>
using std::vector;
using std::string;
using std::unordered_set;
class uniqify {
unordered_set<string> s;
public:
auto exists(const string &filename) const -> bool {
std::ifstream fin(filename);
bool good = fin.good();
return fin.close(), good;
}
void read(const string &filename) {
std::ifstream input(filename);
string line;
while (std::getline(input, line))
if (line.size())
s.insert(line);
}
void write(const string &filename) const {
std::ofstream fout(filename);
for (auto line : s)
fout << line << "\n";
fout.close();
}
};
int main(int argc, char **argv) {
uniqify u;
string file("file.txt");
if(u.exists(file))
u.read(file);
u.write("output_file.txt");
return 0;
}
What causes the RAM to skyrocket over 10x as much?