I'm currently trying to scrape a website in order to get an information on it after a successful login. The website uses a token, so I'm doing the scrape in two times.
The issue is that when I'm posting the data on the login page, all I get is a 500 error instead of my dashboard.
What am I doing wrong?
Here is the code (without the account):
<?php
set_time_limit(300);
define('MAX_FILE_SIZE', 1200000000);
require_once 'simple_html_dom.php';
// load the login page with the token
$phase1 = getDom("https://crowdestor.com/en/account");
if(!isset($phase1['content']) || $phase1['content'] === "") {
exit("error 1");
}
// get the token value
foreach ($phase1['content']->find('input[name=crowd_token]') as $token) {
$token = $token->value;
break;
}
if(!isset($token)) {
exit("error 2");
}
// try to login
$phase2 = getDom("https://crowdestor.com/en/account", [
'post' => [
'crowd_token' => $token,
'login_identity' => "email",
'login_password' => 'password',
'login_account' => "1",
'submit' => "Login",
]
]);
// show the HTML result: it's where I get a 500
echo($phase2['content']);
/**
* @param $url
* @param array $custom
* @return mixed
*/
function getDom($url, $custom = [])
{
$userAgents = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13',
'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)',
];
$options = [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => "",
CURLOPT_REFERER => $url,
CURLOPT_AUTOREFERER => true,
CURLOPT_CONNECTTIMEOUT => 120,
CURLOPT_TIMEOUT => 120,
CURLOPT_MAXREDIRS => 10,
CURLINFO_HEADER_OUT => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_VERBOSE => true,
CURLOPT_COOKIE => (array_key_exists('cookies', $custom) ? $custom['cookies'][0] : null),
CURLOPT_COOKIEJAR => 'cookie.txt',
CURLOPT_COOKIEFILE => 'cookie.txt',
CURLOPT_USERAGENT => (array_key_exists('user_agent', $custom) ? $custom['user_agent'] : $userAgents[array_rand($userAgents)]),
];
// Headers
if (array_key_exists('headers', $custom) and is_array($custom['headers'])) {
$options[CURLOPT_HTTPHEADER] = $custom['headers'];
}
// Post data (put as PHP array, this converts to JSON)
if (array_key_exists('post', $custom) and is_array($custom['post'])) {
$options[CURLOPT_POST] = true;
$options[CURLOPT_POSTFIELDS] = http_build_query($custom['post']);
}
if (array_key_exists('userpass', $custom)) {
$options[CURLOPT_USERPWD] = $custom['userpass'];
}
$ch = curl_init($url);
curl_setopt_array($ch, $options);
$rawContent = str_get_html(curl_exec($ch));
$err = curl_errno($ch);
$errmsg = curl_error($ch);
$header = curl_getinfo($ch);
curl_close($ch);
$header_content = substr($rawContent, 0, $header['header_size']);
$body_content = trim(str_replace($header_content, '', $rawContent));
preg_match_all("#Set-Cookie:\\s+(?<cookie>[^=]+=[^;]+)#m", $header_content, $matches);
$cookiesOut = implode("; ", $matches['cookie']);
$header['errno'] = $err;
$header['errmsg'] = $errmsg;
$header['headers'] = $header_content;
$header['content'] = $rawContent;
$header['cookies'] = $cookiesOut;
return $header;
}