2

I have a WebBrowser control is Windows Forms project. It navigates through all the urls available in “MyTableTest.html”. There are four urls in this page and the webbrowser goes through each one – one by one. Once it reaches the last one it should go to the first again. It works fine in the first iteration – but not going to the urls in the second iteration. This is an intermittent issue - certain times it works.

It seems (from the log) that he awaited task is not completed. What can be done to make it working in the second iteration also?

Note: MyTableTest.html is given below

Note: This is based on the post Get ReadyState from WebBrowser control without DoEvents

Issue

       startNavigation();

       WriteLogFunction("Location 1");

       // wait for DOM onload event, throw if cancelled
       await onloadTcs.Task;

       //ISSUE: Not reaching this location at second time navigation
       WriteLogFunction("Location 2");

Code

public partial class Form1 : Form
{
    public Form1()
    {
        InitializeComponent();
        this.Load += MainForm_Load;
    }

    List<string> visitedProducts = new List<string>();
    string nextNavigationUrl = String.Empty;

    // Form Load event handler
    async void MainForm_Load(object sender, EventArgs e)
    {
        // cancel the whole operation in 20 sec
        var cts = new CancellationTokenSource(20000);

        //urlStore.Add(@"C:\Samples_L\MyTableTest.html");
        nextNavigationUrl = GetHomoePageUrl();
        await NavigateInLoopAsync(cts.Token);
    }

    // navigate to each URL in a loop
    async Task NavigateInLoopAsync(CancellationToken ct)
    {

        bool isIterationComplete = false;

        while (!isIterationComplete)
        {

            string url = String.Empty;

            if (String.IsNullOrEmpty(nextNavigationUrl))
            {
                WriteLogFunction("Close");
                isIterationComplete = true;
            }
            else
            {
                url = nextNavigationUrl;
                ct.ThrowIfCancellationRequested();


                WriteLogFunction("Calling NavigateAsync");

                Action startNavigation = () => this.webBrowser1.Navigate(url);
                var html = await NavigateAsync(ct, startNavigation);


            }
        }
    }

    // asynchronous navigation
    async Task<string> NavigateAsync(CancellationToken ct, Action startNavigation)
    {
        var onloadTcs = new TaskCompletionSource<bool>();
        EventHandler onloadEventHandler = null;

        WriteLogFunction("Inside Function NavigateAsync");

        WebBrowserDocumentCompletedEventHandler documentCompletedHandler = delegate
        {
            // DocumentCompleted may be called several time for the same page,
            // if the page has frames
            if (onloadEventHandler != null)
                return;

            // so, observe DOM onload event to make sure the document is fully loaded
            onloadEventHandler = (s, e) =>
                onloadTcs.TrySetResult(true);
            this.webBrowser1.Document.Window.AttachEventHandler("onload", onloadEventHandler);
        };

        this.webBrowser1.DocumentCompleted += documentCompletedHandler;

        try
        {
            using (ct.Register(() => onloadTcs.TrySetCanceled(), useSynchronizationContext: true))
            {
                startNavigation();

                WriteLogFunction("Location 1");

                // wait for DOM onload event, throw if cancelled
                await onloadTcs.Task;

                //ISSUE: Not reaching this location at second time navigation
                WriteLogFunction("Location 2");
            }
        }
        finally
        {
            this.webBrowser1.DocumentCompleted -= documentCompletedHandler;
            if (onloadEventHandler != null)
                this.webBrowser1.Document.Window.DetachEventHandler("onload", onloadEventHandler);
        }

        WriteLogFunction("Place 3");

        // the page has fully loaded by now

        // optional: let the page run its dynamic AJAX code,
        // we might add another timeout for this loop
        do { await Task.Delay(500, ct); }
        while (this.webBrowser1.IsBusy);

        //Call Processing -- Added By Lijo
        ExerciseApp(this.webBrowser1, null);

        // return the page's HTML content
        return this.webBrowser1.Document.GetElementsByTagName("html")[0].OuterHtml;
    }

    private void ExerciseApp(object sender, WebBrowserDocumentCompletedEventArgs e)
    {
        WriteLogFunction("ExerciseApp");
        var wb = sender as WebBrowser;
        int catalogElementIterationCounter = 0;
        var elementsToConsider = wb.Document.All;
        string productUrl = String.Empty;
        bool isClicked = false;

        foreach (HtmlElement e1 in elementsToConsider)
        {

            catalogElementIterationCounter++;

            string x = e1.TagName;
            String idStr = e1.GetAttribute("id");

            if (!String.IsNullOrWhiteSpace(idStr))
            {
                //Each Product Navigation
                if (idStr.Contains("catalogEntry_img"))
                {
                    productUrl = e1.GetAttribute("href");
                    if (!visitedProducts.Contains(productUrl))
                    {
                        WriteLogFunction("productUrl -- " + productUrl);
                        visitedProducts.Add(productUrl);
                        isClicked = true;

                        //e1.InvokeMember("Click");
                        nextNavigationUrl = productUrl;

                        break;
                    }

                }
            }
        }

        WriteLogFunction(visitedProducts.Count.ToString());
        WriteLogFunction(nextNavigationUrl);

        if (visitedProducts.Count == 4)
        {
            WriteLogFunction("Condition B");
            visitedProducts = new List<string>();
        }

        if (!isClicked)
        {
            WriteLogFunction("Condition C");
            nextNavigationUrl = GetHomoePageUrl();
        }
    }

    private void HomoePageNavigate()
    {
        webBrowser1.Navigate(GetHomoePageUrl());
    }

    private string GetHomoePageUrl()
    {
       return @"C:\Samples_L\MyTableTest.html";
    }

    private void WriteLogFunction(string strMessage)
    {
        using (StreamWriter w = File.AppendText("log.txt"))
        {
            w.WriteLine("\r\n{0} ..... {1} ", DateTime.Now.ToLongTimeString(), strMessage);
        }
    }



}

MyTableTest.html

<html>
<head>

    <style type="text/css">
        table {
            border: 2px solid blue;
        }

        td {
            border: 1px solid teal;
        }
    </style>

</head>
<body>

    <table id="four-grid">
         <tr>
            <td>
                <a href="https://www.wikipedia.org/" id="catalogEntry_img63666">

                    <img src="ssss"
                        alt="B" width="70" />
                </a>
            </td>
            <td>
                <a href="http://www.keralatourism.org/" id="catalogEntry_img63667">

                    <img src="ssss"
                        alt="A" width="70" />
                </a>
            </td>
        </tr>
        <tr>
            <td>
                <a href="https://stackoverflow.com/users/696627/lijo" id="catalogEntry_img63664">

                    <img src="ssss"
                        alt="G" width="70" />
                </a>
            </td>
            <td>
                <a href="http://msdn.microsoft.com/en-US/#fbid=zgGLygxrE84" id="catalogEntry_img63665">

                    <img src="ssss"
                        alt="Y" width="70" />
                </a>
            </td>
        </tr>

    </table>
</body>

</html>
Community
  • 1
  • 1
LCJ
  • 20,854
  • 59
  • 228
  • 387
  • 1
    Perhaps, `DocumentCompleted` or `window.onload` doesn't get fired for that particular link. Do you see the log entries for `"Location 1"`, `"Location 2"`, `"Place 3"`? You may also want to put one more entry before `ExerciseApp`. – noseratio Apr 03 '14 at 00:07
  • @Noseratio "Location 1" is printed but not "Location 2". What is your suggestion to overcome this? Were you able to reproduce this issue? -- I have posted completed code. – LCJ Apr 03 '14 at 01:16
  • 1
    Check my answer, specifically the part referring to `SetFeatureBrowserEmulation`. – noseratio Apr 03 '14 at 01:52

1 Answers1

1

"Location 1" is printed but not "Location 2". What is your suggestion to overcome this? Were you able to reproduce this issue?

I haven't tried to repro, but like I said in the comment, it's obvious that either DocumentCompleted or window.onload is not getting fired for that particular URL. That's possible if the page cannot be fully loaded, or e.g. if the URL is invalid or the server becomes unreachable. That's what the time-out logic is for.

Put one more trace at the beginning of documentCompletedHandler, and one more inside onloadEventHandler:

onloadEventHandler = (s, e) => { Log("inside onloadEventHandler");
    onloadTcs.TrySetResult(true); }

See what gets fired and what doesn't.

Besides, your should also implement WebBrowser Feature Control to enable the modern IE features. This also often affects how web page are loaded. Copy SetFeatureBrowserEmulation from here.

Community
  • 1
  • 1
noseratio
  • 56,401
  • 21
  • 172
  • 421