Sample - HTML filter to only keep basic format

  •  04-10-2013, 11:01 PM

    Sample - HTML filter to only keep basic format

    I thought I would share this for anyone looking to do something similar.

    The idea here is to be able to paste in content and keep only basic formatting such as new lines, bold, italic, underline and lists.  Anything else such as fonts, font-size, links etc are dropped.

    This allows content from multiple sources to be easily combined without playing with formatting.

    The code is cobbled together from various sources and I'm pretty new to JavaScript and DOM so I wouldn't be surprised if there are better ways to do this.

    Cheers

    1. function RichTextEditor_OnPasteFilter(rteeditor, info) {  
    2.             var html = info.Arguments[0];  
    3.             var cmd = info.Arguments[1];  
    4.             var i;  
    5.             var attr;  
    6.             var newElement;  
    7.             var node, nodeChild;  
    8.              
    9.             // convert string to dom model to check if there are any html elements  
    10.             var xmlDoc = new ActiveXObject("Microsoft.XMLDOM");  
    11.             xmlDoc.async = false;  
    12.             html = '<span>' + html + '</span>'  
    13.             xmlDoc.loadXML(html);  
    14.   
    15.             if (xmlDoc.hasChildNodes()) {  
    16.                 // find and replace style-based bold, italic and underline formatting with simple html elements  
    17.   
    18.                 // declare function to recursively walk through dom model based on html string  
    19.                 var walkDOM = function (node, func) {  
    20.                     if (node.attributes) {  
    21.                         func(node);  
    22.                     }  
    23.                     node = node.firstChild;  
    24.                     while (node) {  
    25.                         walkDOM(node, func);  
    26.                         node = node.nextSibling;  
    27.                     }  
    28.                 }  
    29.   
    30.                 //-- add bold element where style contains "font-weight:bold" (or bolder, 700, 800 or 900)  
    31.                 walkDOM(xmlDoc.firstChild, function AddBoldNode(node) {  
    32.                     attr = node.getAttribute("style");  
    33.                     if (attr) {  
    34.                         attr = attr.replace(" """).toLowerCase();  
    35.                         if (attr.indexOf("font-weight:bold") >= 0 || attr.indexOf("font-weight:700") >= 0 || attr.indexOf("font-weight:800") >= 0 || attr.indexOf("font-weight:900") >= 0) {  
    36.                             newElement = xmlDoc.createElement('b');  
    37.                             nodeChild = node.firstChild;  
    38.                             node.replaceChild(newElement, nodeChild);  
    39.                             newElement.appendChild(nodeChild);  
    40.                         }  
    41.                     }  
    42.                 });  
    43.   
    44.                 //-- add underline element where style contains "text-decoration:underline"  
    45.                 walkDOM(xmlDoc.firstChild, function AddUnderlineNode(node) {  
    46.                     attr = node.getAttribute("style");  
    47.                     if (attr) {  
    48.                         attr = attr.replace(" """).toLowerCase();  
    49.                         if (attr.indexOf("text-decoration:underline") >= 0) {  
    50.                             newElement = xmlDoc.createElement('u');  
    51.                             nodeChild = node.firstChild;  
    52.                             node.replaceChild(newElement, nodeChild);  
    53.                             newElement.appendChild(nodeChild);  
    54.                         }  
    55.                     }  
    56.                 });  
    57.   
    58.                 //-- add italic element where style contains "font-style:italic"  
    59.                 walkDOM(xmlDoc.firstChild, function AddItalicNode(node) {  
    60.                     attr = node.getAttribute("style");  
    61.                     if (attr) {  
    62.                         attr = attr.replace(" """).toLowerCase();  
    63.                         if (attr.indexOf("font-style:italic") >= 0) {  
    64.                             newElement = xmlDoc.createElement('i');  
    65.                             nodeChild = node.firstChild;  
    66.                             node.replaceChild(newElement, nodeChild);  
    67.                             newElement.appendChild(nodeChild);  
    68.                         }  
    69.                     }  
    70.                 });  
    71.   
    72.                 //-- reduce ol, ul, dl, li, p elements with complex attributes to basic  
    73.                 walkDOM(xmlDoc.firstChild, function CleanComplexAttr(node) {  
    74.                     switch (node.nodeName) {  
    75.                         case "ul"case "ol"case "li"case "dl"case "p"case "div":  
    76.                             // found element so remove any attributes  
    77.                             for (i = node.attributes.length; i-- > 0; ) {  
    78.                                 node.removeAttributeNode(node.attributes[i]);  
    79.                             }  
    80.                             break;  
    81.                         default:  
    82.                     }  
    83.                 });  
    84.   
    85.                 // return html code as string  
    86.                 html = xmlDoc.xml;  
    87.             }  
    88.   
    89.             //-- temporarily change other safe tags to keep  
    90.             html = html.replace(/<ol>/gim, "&lt;ol&gt;");  
    91.             html = html.replace(/<ul>/gim, "&lt;ul&gt;");  
    92.             html = html.replace(/<\/ol>/gim, "&lt;/ol&gt;");  
    93.             html = html.replace(/<\/ul>/gim, "&lt;/ul&gt;");  
    94.             html = html.replace(/<li>/gim, "&lt;li&gt;");  
    95.             html = html.replace(/<\/li>/gim, "&lt;/li&gt;");  
    96.             html = html.replace(/<br>/gim, "&lt;br&gt;");  
    97.             html = html.replace(/<br\s\/>/gim, "&lt;br&gt;");  
    98.             html = html.replace(/<br\/>/gim, "&lt;br&gt;");  
    99.             html = html.replace(/<p>/gim, "&lt;p&gt;");  
    100.             html = html.replace(/<\/p>/gim, "&lt;/p&gt;");  
    101.             html = html.replace(/<b>/gim, "&lt;b&gt;");  
    102.             html = html.replace(/<\/b>/gim, "&lt;/b&gt;");  
    103.             html = html.replace(/<strong>/gim, "&lt;b&gt;");  
    104.             html = html.replace(/<\/strong>/gim, "&lt;/b&gt;");  
    105.             html = html.replace(/<i>/gim, "&lt;i&gt;");  
    106.             html = html.replace(/<\/i>/gim, "&lt;/i&gt;");  
    107.             html = html.replace(/<emphasis>/gim, "&lt;i&gt;");  
    108.             html = html.replace(/<\/emphasis>/gim, "&lt;/i&gt;");  
    109.             html = html.replace(/<u>/gim, "&lt;u&gt;");  
    110.             html = html.replace(/<\/u>/gim, "&lt;/u&gt;");  
    111.             html = html.replace(/<dl>/gim, "&lt;dl&gt;");  
    112.             html = html.replace(/<\/dl>/gim, "&lt;/dl&gt;");  
    113.             html = html.replace(/<dd>/gim, "&lt;dd&gt;");  
    114.             html = html.replace(/<\/dd>/gim, "&lt;/dd&gt;");  
    115.             html = html.replace(/<dt>/gim, "&lt;dt&gt;");  
    116.             html = html.replace(/<\/dt>/gim, "&lt;/dt&gt;");  
    117.             html = html.replace(/<div>/gim, "&lt;div&gt;");  
    118.             html = html.replace(/<\/div>/gim, "&lt;/div&gt;");  
    119.   
    120.             //-- remove all inside SCRIPT and STYLE tags  
    121.             html = html.replace(/<script.*>[\w\W]{1,}(.*?)[\w\W]{1,}<\/script>/gim, "");  
    122.             html = html.replace(/<style.*>[\w\W]{1,}(.*?)[\w\W]{1,}<\/style>/gim, "");  
    123.   
    124.             //-- remove all else  
    125.             html = html.replace(/<(?:.|\s)*?>/gim, "");  
    126.   
    127.             //-- restore tags to keep  
    128.             html = html.replace(/\&lt;p\&gt;/g, "<p>");  
    129.             html = html.replace(/\&lt;\/p\&gt;/g, "</p>");  
    130.             html = html.replace(/\&lt;br\&gt;/g, "<br>");  
    131.   
    132.             html = html.replace(/\&lt;b\&gt;/g, "<b>");  
    133.             html = html.replace(/\&lt;\/b\&gt;/g, "</b>");  
    134.             html = html.replace(/\&lt;i\&gt;/g, "<i>");  
    135.             html = html.replace(/\&lt;\/i\&gt;/g, "</i>");  
    136.             html = html.replace(/\&lt;u\&gt;/g, "<u>");  
    137.             html = html.replace(/\&lt;\/u\&gt;/g, "</u>");  
    138.             html = html.replace(/\&lt;li\&gt;/g, "<li>");  
    139.             html = html.replace(/\&lt;\/li\&gt;/g, "</li>");  
    140.             html = html.replace(/\&lt;ol\&gt;/g, "<ol>");  
    141.             html = html.replace(/\&lt;\/ol\&gt;/g, "</ol>");  
    142.             html = html.replace(/\&lt;ul\&gt;/g, "<ul style='list-style-type:disc'>");  
    143.             html = html.replace(/\&lt;\/ul\&gt;/g, "</ul>");  
    144.             html = html.replace(/\&lt;dl\&gt;/g, "<dl>");  
    145.             html = html.replace(/\&lt;\/dl\&gt;/g, "</dl>");  
    146.             html = html.replace(/\&lt;dd\&gt;/g, "<dd>");  
    147.             html = html.replace(/\&lt;\/dd\&gt;/g, "</dd>");  
    148.             html = html.replace(/\&lt;dt\&gt;/g, "<dt>");  
    149.             html = html.replace(/\&lt;\/dt\&gt;/g, "</dt>");  
    150.             html = html.replace(/\&lt;div\&gt;/g, "<div>");  
    151.             html = html.replace(/\&lt;\/div\&gt;/g, "</div>");  
    152.   
    153.             //-- get rid of more than 2 multiple line breaks:  
    154.             html = html.replace(/(?:(?:\r\n|\r|\n)\s*){2,}/gim, "\n\n");  
    155.   
    156.             //-- get rid of more than 2 spaces:  
    157.             html = html.replace(/ +(?= )/g, " ");  
    158.   
    159.             info.ReturnValue = html;  
    160.         }  

View Complete Thread