0

I'm currently trying to scrape a website in order to get an information on it after a successful login. The website uses a token, so I'm doing the scrape in two times.

The issue is that when I'm posting the data on the login page, all I get is a 500 error instead of my dashboard.

What am I doing wrong?

Here is the code (without the account):

<?php

set_time_limit(300);
define('MAX_FILE_SIZE', 1200000000);

require_once 'simple_html_dom.php';

// load the login page with the token
$phase1 = getDom("https://crowdestor.com/en/account");

if(!isset($phase1['content']) || $phase1['content'] === "") {
    exit("error 1");
}

// get the token value
foreach ($phase1['content']->find('input[name=crowd_token]') as $token) {
    $token = $token->value;
    break;
}

if(!isset($token)) {
    exit("error 2");
}

// try to login
$phase2 = getDom("https://crowdestor.com/en/account", [
    'post' => [
        'crowd_token' => $token,
        'login_identity' => "email",
        'login_password' => 'password',
        'login_account' => "1",
        'submit' => "Login",
    ]
]);

// show the HTML result: it's where I get a 500
echo($phase2['content']);




/**
 * @param $url
 * @param array $custom
 * @return mixed
 */
function getDom($url, $custom = [])
{
    $userAgents = [
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13',
        'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
        'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)',
    ];

    $options = [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_HEADER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_ENCODING => "",
        CURLOPT_REFERER => $url,
        CURLOPT_AUTOREFERER => true,
        CURLOPT_CONNECTTIMEOUT => 120,
        CURLOPT_TIMEOUT => 120,
        CURLOPT_MAXREDIRS => 10,
        CURLINFO_HEADER_OUT => true,
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_SSL_VERIFYHOST => false,
        CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
        CURLOPT_VERBOSE => true,
        CURLOPT_COOKIE => (array_key_exists('cookies', $custom) ? $custom['cookies'][0] : null),
        CURLOPT_COOKIEJAR => 'cookie.txt',
        CURLOPT_COOKIEFILE => 'cookie.txt',
        CURLOPT_USERAGENT => (array_key_exists('user_agent', $custom) ? $custom['user_agent'] : $userAgents[array_rand($userAgents)]),
    ];

    // Headers
    if (array_key_exists('headers', $custom) and is_array($custom['headers'])) {
        $options[CURLOPT_HTTPHEADER] = $custom['headers'];
    }

    // Post data (put as PHP array, this converts to JSON)
    if (array_key_exists('post', $custom) and is_array($custom['post'])) {
        $options[CURLOPT_POST] = true;
        $options[CURLOPT_POSTFIELDS] = http_build_query($custom['post']);
    }

    if (array_key_exists('userpass', $custom)) {
        $options[CURLOPT_USERPWD] = $custom['userpass'];
    }

    $ch = curl_init($url);
    curl_setopt_array($ch, $options);

    $rawContent = str_get_html(curl_exec($ch));
    $err = curl_errno($ch);
    $errmsg = curl_error($ch);
    $header = curl_getinfo($ch);
    curl_close($ch);

    $header_content = substr($rawContent, 0, $header['header_size']);
    $body_content = trim(str_replace($header_content, '', $rawContent));
    preg_match_all("#Set-Cookie:\\s+(?<cookie>[^=]+=[^;]+)#m", $header_content, $matches);
    $cookiesOut = implode("; ", $matches['cookie']);

    $header['errno'] = $err;
    $header['errmsg'] = $errmsg;
    $header['headers'] = $header_content;
    $header['content'] = $rawContent;
    $header['cookies'] = $cookiesOut;

    return $header;
}
DK01
  • 1

0 Answers0