0

The purpose of this code is to retrieve the URLs of search results. The website I am working with doesn't load all of the results unless you scroll the entire page. However, it takes a few seconds after scrolling for all of the results to load, and as it is, the next line is executed right away and it only retrieves the first few links instead of the entire page.

I think all I need for this to work is just a pause for a few seconds.

The xpath in this example is for google which doesn't lazy load, the site I'm using is behind a login and it does lazy load.

window.scrollTo({ top:document.body.scrollHeight, behavior: 'smooth', })

///pause here

try {

  var maxLinks = 25;  
  var returnData = "URL";  
  var xPath = '//*[@class="r"]/a';

  var xpathResults = document.evaluate(xPath, document, null, 0, null);

  var oNode = xpathResults.iterateNext();

  var nodeList = [];
  var linkCount = 0;
  var hrefStr;
  var returnStr;
  var linkText;

  while (oNode && (linkCount < maxLinks)) {

    if (oNode.href !== hrefStr) {
      linkCount += 1;
      hrefStr = oNode.href;
      linkText = oNode.textContent;

      if (returnData === "MD") {
        returnStr = "[" + linkText + "](" + hrefStr + ")";
      }
      else {
        returnStr = hrefStr;
      }

      nodeList.push(returnStr);
    }

    oNode = xpathResults.iterateNext();

  } 

  returnResults = nodeList.join('\n');


} catch (pError) {

    if (!oError.message) {
      oError.message = pError.toString();
    }

    oError.message = "[ERROR]"
      + "\n\nError Number: " + oError.errorNumber + "\n"
      + oError.message

    returnResults = oError.message;  
} 

function copyToClipboard(text) {
    var dummy = document.createElement("textarea");
    document.body.appendChild(dummy);
    dummy.value = text;
    dummy.select();
    document.execCommand("copy");
    document.body.removeChild(dummy);
}
copyToClipboard(returnResults)
copyToClipboard(returnResults)
3ggg3
  • 13
  • 3
  • 1
    There is sleep method solution that allows you to wait time between actions in your code. https://stackoverflow.com/questions/951021/what-is-the-javascript-version-of-sleep – Yotam Dahan Dec 30 '19 at 15:13
  • 2
    Instead of sleeping for an arbitrary amount of time and then hoping for the content to have appeared, you should hook onto the code that lazy-loads the content. – Bergi Dec 30 '19 at 15:15
  • @YotamDahan Sorry, I'm a complete novice in Javascript could you help walk me through what goes in the console log parentheses? – 3ggg3 Dec 30 '19 at 15:29
  • Does this answer your question? [Sleep in JavaScript - delay between actions](https://stackoverflow.com/questions/758688/sleep-in-javascript-delay-between-actions) – Javier Larroulet Dec 30 '19 at 16:05

2 Answers2

0

There is sleep method solution that allows you to wait time between actions as I mentioned in my comment earlier.

Here is my solution:

    function sleep (time) { //Sleep function
      return new Promise((resolve) => setTimeout(resolve, time));
    }
    sleep(500).then(() => { //Wait the stated time then do something..
         var maxLinks = 25;  
  var returnData = "URL";  
  var xPath = '//*[@class="r"]/a';

  var xpathResults = document.evaluate(xPath, document, null, 0, null);

  var oNode = xpathResults.iterateNext();

  var nodeList = [];
  var linkCount = 0;
  var hrefStr;
  var returnStr;
  var linkText;

  while (oNode && (linkCount < maxLinks)) {

if (oNode.href !== hrefStr) {
  linkCount += 1;
  hrefStr = oNode.href;
  linkText = oNode.textContent;

  if (returnData === "MD") {
    returnStr = "[" + linkText + "](" + hrefStr + ")";
  }
  else {
    returnStr = hrefStr;
  }

  nodeList.push(returnStr);
}

oNode = xpathResults.iterateNext();

  } 

  returnResults = nodeList.join('\n');


} catch (pError) {

    if (!oError.message) {
      oError.message = pError.toString();
    }

    oError.message = "[ERROR]"
      + "\n\nError Number: " + oError.errorNumber + "\n"
      + oError.message

    returnResults = oError.message;  
} 

function copyToClipboard(text) {
    var dummy = document.createElement("textarea");
    document.body.appendChild(dummy);
    dummy.value = text;
    dummy.select();
    document.execCommand("copy");
    document.body.removeChild(dummy);
}
copyToClipboard(returnResults)
copyToClipboard(returnResults)
});
   }
Yotam Dahan
  • 561
  • 5
  • 21
  • hmm when I try entering this into the console, it says there's a missing parentheses are the argument list, is this the then())? – 3ggg3 Dec 30 '19 at 15:55
0

There isn't a sleep() in the same way that there is in Java. If you don't understand why, you'll need to read about threading (or the lack thereof) in JavaScript. However, you can create an async function and then call it with an await (but your code will need to be in an async function:

function sleep(millis) {
  return new Promise(resolve => setTimeout(resolve, millis));
}


async function doIt() {
    const max = 4;
    let count = 0;
    while (count < max) {
        await sleep(1000);
        console.log('loop: %s', count);
        count++;
    }
}

doIt();