0

I am trying to grab all substrings in between or after the occurrences of a certain character. Specifically with for search query urls (grabbing the options), for example if I have:

std::string url = "https://www.google.com/search?q=i+need+help&rlz=1C1CHBF_enUS851US851&oq=i+need+help&aqs=chrome.0.69i59j0l3j69i60l2.4646j0j7&sourceid=chrome&ie=UTF-8"

I need to output the strings in between and after (for the last occurrence) the "&" character so the output would be:

rlz=1C1CHBF_enUS851US851 
oq=i+need+help
aqs=chrome.0.69i59j0l3j69i60l2.4646j0j7
sourceid=chrome 
ie=UTF-8

I understand how to do this with one string, but I am stuck trying to implement it into a loop. This has to be done with several urls of different lengths and number of options.

So far I can only grab one substring, between the first and second occurrences of character, but I need to grab all of them in any given url.

int a = url.find("&") + 1;
int b = url.find("&", url.find("&") + 1);
int c = (b - a);
std::string option = url.substr(a, c);
Vlad from Moscow
  • 224,104
  • 15
  • 141
  • 268
  • Also the `q=i+need+help` before the first `&`? – mch Nov 28 '19 at 08:46
  • @mch Yes, but I think I could do that by finding the first "q=" and the first "&" pretty easily. I am more worried about the parts after – stonesparks Nov 28 '19 at 08:50
  • Does this answer your question? [Parse (split) a string in C++ using string delimiter (standard C++)](https://stackoverflow.com/questions/14265581/parse-split-a-string-in-c-using-string-delimiter-standard-c) – jdigital Nov 28 '19 at 08:52

4 Answers4

0

Just find the next & from the previous one in a loop and exit the loop if you cannot find any & more and take care of the first element:

vector<string> foo(const string& url)
{
    vector<string> result;
    auto a = url.find("?");
    if (a == string::npos) return result;

    auto b = url.find("&");
    if (b == string::npos)
    {
        result.push_back(url.substr(a + 1, string::npos));
        return result;
    }
    result.push_back(url.substr(a + 1, b - a - 1));
    do
    {
        a = b;
        b = url.find("&", a + 1);
        result.push_back(url.substr(a + 1, b - a - 1));
    } while (b != string::npos);

    return result;
}

works for your example: https://ideone.com/SiRZQB

mch
  • 8,070
  • 2
  • 24
  • 39
0

Tbh I genuinely think that you should use a proper URI parser for this job as there could be a lot of fringe cases. But here you go:

#include <iostream>
#include <string>

int main()
{
  std::string url = "https://www.google.com/search?q=i+need+help&rlz=1C1CHBF_enUS851US851&oq=i+need+help&aqs=chrome.0.69i59j0l3j69i60l2.4646j0j7&sourceid=chrome&ie=UTF-8";
  char delimiter = '&';
  size_t start = url.find(delimiter);
  size_t end;
  while (start != std::string::npos) {
      end = url.find(delimiter, start + 1);
      std::cout << url.substr(start + 1, end - start - 1) << std::endl;
      start = end;
  }
}

Playground: http://cpp.sh/8pshy7

Frederick Zhang
  • 3,243
  • 4
  • 25
  • 45
0

You can try the following code which uses the regular expressions to parse the url.

#include <regex>
#include <iostream>
#include <string>
using namespace std;

int main(){
    string url = "https://www.google.com/search?q=i+need+help&rlz=1C1CHBF_enUS851US851&oq=i+need+help&aqs=chrome.0.69i59j0l3j69i60l2.4646j0j7&sourceid=chrome&ie=UTF-8";
    regex rg("[\?&](([^&]+)=([^&]+))");
    for(smatch sm; regex_search(url, sm, rg); url=sm.suffix())
        cout << sm[1] <<endl;
    return 0;
}
yabhishek
  • 391
  • 3
  • 14
0

You can use just an ordinary for loop as for example

#include <iostream>
#include <string>
#include <vector>
#include <iterator>
#include <algorithm>

int main() 
{
    std::string url = "https://www.google.com/search?q=i+need+help"
                      "&rlz=1C1CHBF_enUS851US851"
                      "&oq=i+need+help"
                      "&aqs=chrome.0.69i59j0l3j69i60l2.4646j0j7"
                      "&sourceid=chrome&ie=UTF-8";

    char c = '&';

    size_t n = std::count_if( std::begin( url ), std::end( url ),
                              [=]( const auto &item )
                              {
                                 return item == c;
                              } );

    std::vector<std::string> v;
    v.reserve( n );

    for ( auto pos = url.find( c, 0 );  pos != std::string::npos; )
    {
        auto next = url.find( c, ++pos );

        auto n = ( next == std::string::npos ? url.size() : next ) - pos;

        v.push_back( url.substr( pos, n ) ); 

        pos = next;                    
    }

    for ( const auto &s : v ) std::cout << s << '\n';

}

The program output is

rlz=1C1CHBF_enUS851US851
oq=i+need+help
aqs=chrome.0.69i59j0l3j69i60l2.4646j0j7
sourceid=chrome
ie=UTF-8

Or you can write a separate function as for example

#include <iostream>
#include <string>
#include <vector>
#include <iterator>
#include <algorithm>

std::vector<std::string> split_url( const std::string &url, char c = '&' )
{
    size_t n = std::count_if( std::begin( url ), std::end( url ),
                              [=]( const auto &item )
                              {
                                 return item == c;
                              } );

    std::vector<std::string> v;
    v.reserve( n );

    for ( auto pos = url.find( c, 0 );  pos != std::string::npos; )
    {
        auto next = url.find( c, ++pos );

        auto n = ( next == std::string::npos ? url.size() : next ) - pos;

        v.push_back( url.substr( pos, n ) ); 

        pos = next;                    
    }

    return v;
}

int main() 
{
    std::string url = "https://www.google.com/search?q=i+need+help"
                      "&rlz=1C1CHBF_enUS851US851"
                      "&oq=i+need+help"
                      "&aqs=chrome.0.69i59j0l3j69i60l2.4646j0j7"
                      "&sourceid=chrome&ie=UTF-8";


    auto v = split_url(url );

    for ( const auto &s : v ) std::cout << s << '\n';

}
Vlad from Moscow
  • 224,104
  • 15
  • 141
  • 268