My cut at the same. I chose to go with divide and conquer. It is not fast. It is not efficient. But it is simple.
Unfortunately it didn't work in this case because we are preserving the delimiters in the output. Dividing allowed later delimiters to split previously found delimiters.
Eg:
Source :=foo+bar . :=baz+quaax:= C++
Delims [+][ ][:=][:]
Result [:][=][foo][+][bar][ ][ ][.][ ][ ][ ][:][=][baz][+][quaax][:][=][ ][ ][C][+][+]
Yuck.
Finally settled on a similar approach to jafar's and added it to my support library to try out in a job I'm working on to replace the divide and conquer approach because it does look to be faster. Wouldn't have bothered posting this, but Jafar's is a bit over complicated for my tastes. Haven't done any profiling so his may be faster.
#include <iostream>
#include <vector>
// easy vector output
template<class TYPE>
std::ostream & operator<<(std::ostream & out,
const std::vector<TYPE> & in)
{
for (const TYPE &val: in)
{
out << "["<< val << "]";
}
return out;
}
// find the first of many string delimiters
size_t multifind(size_t start,
const std::string & source,
const std::vector<std::string> &delims,
size_t & delfound)
{
size_t lowest = std::string::npos;
for (size_t i = 0; i < delims.size(); i++)
{
size_t pos = source.find(delims[i], start);
if (pos == start)
{
lowest = pos;
delfound = i;
break;
}
else if (pos < lowest)
{
lowest = pos;
delfound = i;
}
}
return lowest;
}
// do the grunt work
std::vector<std::string> splitString(const std::string &source,
const std::vector<std::string> &delims)
{
std::vector<std::string> tokens;
size_t current = 0;
size_t delfound;
size_t next = multifind(current,
source,
delims,
delfound);
while(next != std::string::npos)
{
if (current < next)
{
tokens.push_back(source.substr(current, next - current));
}
tokens.push_back(delims[delfound]);
current = next + delims[delfound].length();
next = multifind(current,
source,
delims,
delfound);
}
if (current < source.length())
{
tokens.push_back(source.substr(current, std::string::npos));
}
return tokens;
}
void test(const std::string &source,
const std::vector<std::string> &delims)
{
std::cout << "Source " << source << std::endl;
std::cout << "Delims " << delims << std::endl;
std::cout << "Result " << splitString(source, delims) << std::endl << std::endl;
}
int main()
{
test(":=foo+bar . :=baz+quaax:= C++", { " ",":=","+" });
test(":=foo+bar . :=baz+quaax:= C++", { ":=","+"," " });
test(":=foo+bar . :=baz+quaax:= C++", { "+"," ",":=" });
test(":=foo+bar . :=baz+quaax:= C++", { "+"," ",":=",":" });
test(":=foo+bar . :=baz+quaax:= C++", { ":"," ",":=","+" });
test("foo+bar . :=baz+quaax:= C++lalala", { "+"," ",":=",":" });
}