
I need to automatically extract all the name properties from this JavaScript (separately for providers large and providers small)

var providers_large = {
    google : {
        name : 'Google',
        url : 'https://www.google.com/accounts/o8/id'
    yahoo : {
        name : 'Yahoo',
        url : 'http://me.yahoo.com/'
    aol : {
        name : 'AOL',
        label : 'Enter your AOL screenname.',
        url : 'http://openid.aol.com/{username}'
    myopenid : {
        name : 'MyOpenID',
        label : 'Enter your MyOpenID username.',
        url : 'http://{username}.myopenid.com/'
    openid : {
        name : 'OpenID',
        label : 'Enter your OpenID.',
        url : null

var providers_small = {
    livejournal : {
        name : 'LiveJournal',
        label : 'Enter your Livejournal username.',
        url : 'http://{username}.livejournal.com/'
    /* flickr: {
        name: 'Flickr',        
        label: 'Enter your Flickr username.',
        url: 'http://flickr.com/{username}/'
    }, */
    /* technorati: {
        name: 'Technorati',
        label: 'Enter your Technorati username.',
        url: 'http://technorati.com/people/technorati/{username}/'
    }, */
    wordpress : {
        name : 'Wordpress',
        label : 'Enter your Wordpress.com username.',
        url : 'http://{username}.wordpress.com/'
    blogger : {
        name : 'Blogger',
        label : 'Your Blogger account',
        url : 'http://{username}.blogspot.com/'
    verisign : {
        name : 'Verisign',
        label : 'Your Verisign username',
        url : 'http://{username}.pip.verisignlabs.com/'
    /* vidoop: {
        name: 'Vidoop',
        label: 'Your Vidoop username',
        url: 'http://{username}.myvidoop.com/'
    }, */
    /* launchpad: {
        name: 'Launchpad',
        label: 'Your Launchpad username',
        url: 'https://launchpad.net/~{username}'
    }, */
    claimid : {
        name : 'ClaimID',
        label : 'Your ClaimID username',
        url : 'http://claimid.com/{username}'
    clickpass : {
        name : 'ClickPass',
        label : 'Enter your ClickPass username',
        url : 'http://clickpass.com/public/{username}'
    google_profile : {
        name : 'Google Profile',
        label : 'Enter your Google Profile username',
        url : 'http://www.google.com/profiles/{username}'

openid.locale = 'en';
openid.sprite = 'en'; // reused in german& japan localization
openid.demo_text = 'In client demo mode. Normally would have submitted OpenID:';
openid.signin_text = 'Sign-In';
openid.image_title = 'log in with {provider}';

So I need to: A) Remove all the C-Style comments and B) Get all the name values for [providers_large, providers_small] (after the comments have been removed)

So far I have tried regex to remove C-Style comments (and failed) and regex to get everything between curly braces (and failed)

I subsequently tried to read it in as JSON, but this of course failed with "invalid json primitve whatever"

This are the stackoverflow-sites I uses and this are my examples I tried so far

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ConsoleExperiments

    public class Program

        // http://stackoverflow.com/questions/2538279/strip-out-c-style-multi-line-comments
        // NOT working
        static string RemoveCstyleComments(string strInput)
            string strPattern = @"/[*][\w\d\s]+[*]/";
            //strPattern = @"/\*.*?\*/";
            strPattern = "/\\*.*?\\*/";

            string strOutput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, string.Empty, System.Text.RegularExpressions.RegexOptions.Multiline);
            return strOutput;

        // http://stackoverflow.com/questions/413071/regex-to-get-string-between-curly-braces-i-want-whats-between-the-curly-brace
        // http://stackoverflow.com/questions/5337166/regular-expression-get-string-between-curly-braces
        // http://stackoverflow.com/questions/1904617/regex-for-removing-curly-brackets-with-nested-curly-brackets
        // http://stackoverflow.com/questions/378415/how-do-i-extract-a-string-of-text-that-lies-between-two-brackets-using-net
        static string GetCurlyValues(string strInput)
            string strPattern = "/{(.*?)}/";
            strPattern = "/{([^}]*)}/";
            strPattern = @"\{(\s*?.*?)*?\}";
            strPattern = @"(?<=\{).*(?=\})";
            strPattern = "{(.*{(.*)}.*)}";
            strPattern = "{{([^}]*)}}";
            strPattern = "{{({?}?[^{}])*}}";
            strPattern = @"\(([^)]*)\)";

            System.Text.RegularExpressions.Regex rex = new System.Text.RegularExpressions.Regex(strPattern, System.Text.RegularExpressions.RegexOptions.Multiline);

            System.Text.RegularExpressions.Match mMatch = rex.Match(strInput);

            foreach (System.Text.RegularExpressions.Group g in mMatch.Groups)
                Console.WriteLine("Group: " + g.Value);
                foreach (System.Text.RegularExpressions.Capture c in g.Captures)
                    Console.WriteLine("Capture: " + c.Value);

            return "";

        static void ReadFile()
                string strFilePath = @"TestFile.txt";
                if (System.IO.File.Exists(strFilePath))
                    // Create an instance of StreamReader to read from a file.
                    // The using statement also closes the StreamReader.
                    using (System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath))
                        string line;
                        // Read and display lines from the file until the end of
                        // the file is reached.
                        while ((line = sr.ReadLine()) != null)
                        } // Whend

                    } // End Using

                } // End if (System.IO.File.Exists(strFilePath))
                    Console.WriteLine("File \"" + strFilePath + "\" does not exist.");
            } // End Try
            catch (Exception e)
                // Let the user know what went wrong.
                Console.WriteLine("The file could not be read:");
            } // End Catch

        } // End Sub

        public class cProvider
            public string name = "abc";
            public string label ="def";
            public string url ="url";

        public class cProviders_large
            public List<cProvider> foo = new List<cProvider>();

        static void Main(string[] args)
            string strContent = System.IO.File.ReadAllText(@"D:\UserName\Downloads\openid-selector-1.3\openid-selector\js\openid-en - Kopie.js.txt");
            System.Web.Script.Serialization.JavaScriptSerializer js = new System.Web.Script.Serialization.JavaScriptSerializer();
            //object obj = js.DeserializeObject(strContent);

            cProviders_large xx = new cProviders_large();
            cProvider ap = new cProvider();

            string res = js.Serialize(xx);

            Console.WriteLine(" --- Press any key to continue --- ");
        } // End Sub Main

    } // End Class Program

} // End namespace ConsoleExperiments

Could anybody who understands regex better than me provide me with the necessary regex-expressions ? Right now, it looks like I will end-up doing it by hand every time the file changes, and I really really hate this...

Edit: On a sidenote, the v8 wrapper uses C++.NET, and thus doesn't work on Linux, although the v8 engine does work very well on Linux.

So I'm sticking to solving the problem via JSON conversion.

  • 1
    I believe the problem is equivalent to this one: http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags/1732454#1732454 JavaScript is not a regular language. Removing the comments should be possible but will it be useful if you can't do the rest? – Stilgar Nov 30 '11 at 09:25
  • @Stilgar: Actually, doing the rest is less complicated than properly removing the comments. I am already 90 to 95 percent throught with the rest. – Stefan Steiger Nov 30 '11 at 19:10

You could use a javascript engine:

using System;
using System.IO;
using Noesis.Javascript;

class Program
    static void Main()
        var context = new JavascriptContext();
        context.SetParameter("openid", new object());
        dynamic providers_large = context.GetParameter("providers_large");
        foreach (var provider in providers_large)
                "name: {0}, url: {1}", 

prints the following on my console:

name: Google, url: https://www.google.com/accounts/o8/id
name: Yahoo, url: http://me.yahoo.com/
name: AOL, url: http://openid.aol.com/{username}
name: MyOpenID, url: http://{username}.myopenid.com/
name: OpenID, url:
Consider the JavaScriptSerializer for this, provides json deserialization, If you remove the vars and comments it should be able to create an object graph.

Darin Dimitrov's answer is certainly the most simple.
However, Noesis.Javascript is most annoyingly written in C++.NET, which means it cannot be compiled on Linux, although both C#/.NET (via mono) and the v8 engine run excellently on Linux.

So here is the workout via conversion to JSON and deserialzation:

static string RemoveCstyleComments(string strInput)
            string strPattern = @"/[*][\w\d\s]+[*]/";
            //strPattern = @"/\*.*?\*/"; // Doesn't work
            //strPattern = "/\\*.*?\\*/"; // Doesn't work
            //strPattern = @"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/ "; // Doesn't work
            //strPattern = @"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/ "; // Doesn't work

            // http://stackoverflow.com/questions/462843/improving-fixing-a-regex-for-c-style-block-comments
            strPattern = @"/\*(?>(?:(?>[^*]+)|\*(?!/))*)\*/";  // Works !

            string strOutput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, string.Empty, System.Text.RegularExpressions.RegexOptions.Multiline);
            return strOutput;
        } // End Function RemoveCstyleComments

        static string ReplaceVariables(string strInput)
        string strPattern = @"var\s+providers_large(\s+)?=(\s+)?{(\s+)?";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"providers_large\" : {" + Environment.NewLine, System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"(\s+)?var\s+providers_small(\s+)?=(\s+)?{(\s+)?";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, ",   \"providers_small\" : {" + Environment.NewLine, System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"}(\s+)?;(\s+)?";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "}" + Environment.NewLine, System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"$(\s+)?(\w+)(\s+)?:(\s+)?{";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"$2\" : {", System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"name(\s+)?:(\s+)?'";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"name\" : '", System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"url(\s+)?:(\s+)?'";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"url\" : '", System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"label(\s+)?:(\s+)?'";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"label\" : '", System.Text.RegularExpressions.RegexOptions.Multiline);

        strInput = strInput.Replace("'", "\"");

        strPattern = "openid\\.locale.*";
        //strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "", System.Text.RegularExpressions.RegexOptions.Multiline);
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "", System.Text.RegularExpressions.RegexOptions.Singleline);

        strPattern = null;

        string[] astrTrailingComments = {

        foreach (string strThisPattern in astrTrailingComments)
            strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strThisPattern + ".+", "", System.Text.RegularExpressions.RegexOptions.Multiline);
        } // Next strThisPattern

        strInput = "{" + strInput + "}";

        return strInput;
    } // End Function ReplaceVariables

        static System.Collections.Specialized.NameValueCollection TrySerialize(string strInput)
            strInput = RemoveCstyleComments(strInput);
            strInput = ReplaceVariables(strInput);

            System.Collections.Specialized.NameValueCollection nvc = new System.Collections.Specialized.NameValueCollection(StringComparer.OrdinalIgnoreCase);

            System.Web.Script.Serialization.JavaScriptSerializer js = new System.Web.Script.Serialization.JavaScriptSerializer();
            dynamic objScript = js.DeserializeObject(strInput);
            js = null;

            foreach (dynamic kvp in objScript)
                dynamic dictValues = kvp.Value;


                foreach (string strMemberVariable in dictValues.Keys)

                        nvc.Add("providers_small", strMemberVariable);

                        nvc.Add("providers_large", strMemberVariable);

                    //Console.WriteLine(strMemberVariable + ":");

                    dynamic MemberVariable = dictValues[strMemberVariable];

                    foreach (string strProperty in MemberVariable.Keys)
                        dynamic objPropertyValue = MemberVariable[strProperty];

                        //if (objPropertyValue != null)
                        //Console.WriteLine("     - " + (strProperty + ":").PadRight(8, ' ') + objPropertyValue.ToString());
                    } // Next strProperty

                } // Next strMemberVariable

            } // Next kvp

            // Console.WriteLine("providers large: ");
            // Console.WriteLine(nvc["providers_large"]);

            // Console.WriteLine(Environment.NewLine);
            // Console.WriteLine("providers small: ");
            // Console.WriteLine(nvc["providers_small"]);

            return nvc;
        } // End Function TrySerialize

        public static void GetProviders()
            string strContent = System.IO.File.ReadAllText(@"D:\UserName\Downloads\openid-selector-1.3\openid-selector\js\openid-en.js");
            strContent = System.IO.File.ReadAllText(@"D:\UserName\Downloads\openid-selector-1.3\openid-selector\js\openid-ru.js");

            System.Collections.Specialized.NameValueCollection nvc = TrySerialize(strContent);

            Console.WriteLine("providers large: ");
            foreach (string strValue in nvc.GetValues("providers_large"))
                Console.WriteLine("    " + strValue);
            } // Next strValue

            //System.Runtime.Serialization.Json.DataContractJsonSerializer dcjs = new System.Runtime.Serialization.Json.DataContractJsonSerializer();
            // The above is bullshit in unadulterated filth. ==> Use System.Web.Extensions instead

            Console.WriteLine("providers small: ");
            foreach (string strValue in nvc.GetValues("providers_small"))
                Console.WriteLine("    " + strValue);
            } // Next strValue

        } // End Sub GetProviders
Stefan Steiger
  • 68,404
  • 63
  • 337
  • 408