Re: Clean Up HTML on the server side

  •  10-16-2009, 5:09 PM

    Re: Clean Up HTML on the server side

    is there a way to call the codeCleaner('Word') function instead of calling the CleanCode in the Toolbar. 
     
    I tried using your regex function and it still saved a lot of word formatting junk.  Surely your codeCleaner function is more robust than the regex function you provided.
     
    Here is my version of the regex function in C#:
     
    1. public static string CleanWordHtml(string html)   
    2.     {   
    3.         String cleanstring = String.Empty;   
    4.         System.Text.RegularExpressions.Regex regex;   
    5.   
    6.         cleanstring=html;   
    7.   
    8.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    9.                  @"<\\?\??xml[^>]>""",   
    10.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    11.   
    12.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    13.                  @"\s*mso-[^:]+:[^;""]+;?""",   
    14.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    15.   
    16.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    17.                  @"<\/?\w+:[^>]*>""",   
    18.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    19.   
    20.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    21.                  @"<\!--.*-->""",   
    22.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    23.   
    24.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    25.                  @"[\”\“]""\"\"",   
    26.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    27.   
    28.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    29.                  @"[\‘\’]""'",   
    30.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    31.   
    32.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    33.                  @"<\\?\?xml[^>]*>""",   
    34.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    35.   
    36.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    37.                  @"<span\s*[^>]*>\s*&nbsp;\s*<\/span>""&nbsp;",   
    38.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    39.   
    40.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    41.                  @"<span\s*[^>]*><\/span>""",   
    42.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    43.   
    44.         cleanstring = System.Text.RegularExpressions.Regex.Replace(cleanstring,   
    45.                  @"<(\w+)[^>]*\sstyle=""[^""]*DISPLAY\s?:\s?none(.*?)<\/\1>""",   
    46.                  System.Text.RegularExpressions.RegexOptions.IgnoreCase);   
    47.   
    48.         return cleanstring;   
    49. }  
View Complete Thread