0

I want to capture the HAR file of a page that requires authentication. I am using selenium webdriver and browsermob-proxy.

I first open the login page in the browser to collect the browser cookies, then use the requests module to login and save the authentication cookies, and then try to pass it into the browser to load the content from the page I actually want.

My code is a combination of Python - Requests, Selenium - passing cookies while logging in and How Do I Reset The Har File Used By Browser-Mob-Proxy Module For Python?

def fetch_resources(url):
    server = Server("./Resources/browsermob-proxy-2.1.4/bin/browsermob-proxy")
    server.start()
    sleep(1)
    # WARNING: trustAllServers is a security risk but is necessary for SSL due to browsermob-proxy cert checks: WARNING
    # gotcha: need to install BMP cert into browser
    proxy = server.create_proxy(params={"trustAllServers": "true"})
    sleep(1)

    #options = Options()
    #options.headless = True

    # configure the browser proxy in Firefox
    profile = webdriver.FirefoxProfile()
    # issue: https://stackoverflow.com/a/58744858
    profile.set_proxy(proxy.selenium_proxy())
    browser = webdriver.Firefox(
        #options=options,
        firefox_profile=profile,
        executable_path="./Resources/geckodriver",
        proxy=proxy.selenium_proxy(),
    )

    user = random.choice(list(creds.keys()))
    proxy.new_har(ref="Auth",options={"captureHeaders": True, "captureContent": True})
    browser.get("http://127.0.0.1:4000/login")
    response_har = proxy.har

    #storing the cookies generated by the browser
    request_cookies_browser = browser.get_cookies()

    #making a persistent connection using the requests library
    s = requests.Session()
    data = {"name":user, "password":creds[user]}
    r = s.post("http://127.0.0.1:4000/login", data=data)

    #passing the cookie of the response to the browser
    dict_resp_cookies = s.cookies.get_dict()
    response_cookies_browser = [{'name':name, 'value':value} for name, value in dict_resp_cookies.items()]
    print("coookies", response_cookies_browser)

    c = [browser.add_cookie(c) for c in response_cookies_browser]

    proxy.new_har(ref="Content", options={"captureHeaders": True, "captureContent": True})
    browser.get(url)
    print(proxy.har)

    # wait until the entire page loads
    print(url)
    sleep(5)

    # returns network logs (HAR) as JSON
    result = extract_resource_urls(proxy.har)
    server.stop()
    browser.quit()

    return result

The first HAR for /login populates as expected and the browser logs into the second page as expected. But the second HAR is empty:

{'log': {'version': '1.2', 'creator': {'name': 'BrowserMob Proxy', 'version': '2.1.4', 'comment': ''}, 'pages': [{'id': 'Content', 'startedDateTime': '2020-04-28T10:19:08.885-04:00', 'title': 'CTFd', 'pageTimings': {'comment': ''}, 'comment': ''}], 'entries': [], 'comment': ''}}

Is this because I am on localhost? Am I missing a step in handling cookies or the HAR files?

owl
  • 1

0 Answers0