2
string emailBody = "sample text for NewFinancial History:\"xyz\"  text NewFinancial History:\"abc\"  NewEBTDI$:\"abc\" ds \"NewFinancial History:pqr\" test";

private Dictionary<string, List<string>> ExtractFieldValuesForDynamicListObject(string emailBody)
{
    Dictionary<string, List<string>> paramValueList = new Dictionary<string, List<string>>();
    try
    {
        emailBody = ReplaceIncompatableQuotes(emailBody);
        emailBody = string.Join(" ", Regex.Split(emailBody.Trim(), @"(?:\r\n|\n|\r)"));
        var keys = Regex.Matches(emailBody, @"\bNew\B(.+?):", RegexOptions.Singleline).OfType<Match>().Select(m => m.Groups[0].Value.Replace(":", "")).Distinct().ToArray();
        foreach (string key in keys)
        {
            List<string> valueList = new List<string>();
            string regex = "" + Regex.Escape(key) + ":" + "\"(?<" + Regex.Escape(GetCleanKey(key)) + ">[^\"]*)\"";

            var matches = Regex.Matches(emailBody, regex, RegexOptions.Singleline);
            foreach (Match match in matches)
            {
                if (match.Success)
                {
                    string value = match.Groups[Regex.Escape(GetCleanKey(key))].Value;
                    if (!valueList.Contains(value.Trim()))
                    {
                        valueList.Add(value.Trim());
                    }
                }
            }
            valueList = valueList.Distinct().ToList();
            string listName = key.Replace("New", "");                    
            paramValueList.Add(listName.Trim(), valueList);
        }
    }
    catch (Exception ex)
    {
        DCULSLogger.LogError(ex);
    }
    return paramValueList;
}

My goal here is to scan though the email body and identify the string with NewListName:"Value" nomenclature and it is working perfectly fine using the above regex and method. Now my client has changed the nomenclature from NewListName:"Value" to "NewListName:Value". I want to grab the text between the double quotes along with New: keyword. So I need to look for "New keyword and ending quotes. Can anyone help me modify the above regex to scan through the email body and get all list of value between double quotes. So in above example I want to grab \"NewFinancial History:pqr\" in my results. Any help would be appreciated.

Peter Duniho
  • 62,751
  • 5
  • 84
  • 120
Savan Patel
  • 307
  • 2
  • 12

1 Answers1

1

You may use a regex that will match quote, New, some chars other than " and :, then :, and then any chars but " up to a ":

var keys = Regex.Matches(emailBody, @"""New[^"":]+:[^""]+""", RegexOptions.Singleline)
       .OfType<Match>()
       .Select(m => m.Value)
       .Distinct()
       .ToArray();

See the regex demo

enter image description here

Pattern details:

  • " - a literal double quote
  • New - a literal substring
  • [^":]+ - 1 or more characters other than " and : (the [^...] is a negated character class)
  • : - a literal colon
  • [^"]+ - 1 or more characters other than "
  • " - a literal double quote
Wiktor Stribiżew
  • 484,719
  • 26
  • 302
  • 397