Introduction:
This article shows how to export a html values into text by using javascript.
Main:
Converting html into Text,we need to follow the below steps,
1.Find Html Tag Values by using regex,
function getTagCode(sID) {
var myBrowser = strTrim(navigator.appName.substring(0, 9));
myBrowser = myBrowser.toLowerCase();
if(document.getElementById) {
oDoc = document.getElementById(sID);
} else if(document.all) {
oDoc = document.all[sID];
}
var getTxt = "";
if(typeof(oDoc.innerText) != 'undefined') {
getTxt = strTrim(oDoc.innerText);
} else {
getTxt = strTrim(oDoc.innerHTML); //textContent doesn't keep \n with <LI>, so use innerHTML
var regExLi = /<\/li>/gi; //RegEx to find </li>
var regExHTML = /<\S[^>]*>/g; //RegEx to find HTML Tags
var regExAnd = /&/g; //to find ampersand as HTML entity
var regExSpace = / /g; //to find whitespace as HTML entity
var regExLT = /</g; //to find < as HTML entity
var regExGT = />/g; //to find > as HTML entity
getTxt = getTxt.replace(regExLi, "\n"); //replace </li> with \n
getTxt = getTxt.replace(regExHTML, ""); //strip out all HTML Tags
getTxt = getTxt.replace(regExAnd, "&"); //replace & with &
getTxt = getTxt.replace(regExSpace, " "); //replace with simple whitespace
getTxt = getTxt.replace(regExLT, "<"); //replace < with <
getTxt = getTxt.replace(regExGT, ">"); //replace > with >
}
return getTxt;
}
function getTagCode(sID) { var myBrowser = strTrim(navigator.appName.substring(0, 9)); myBrowser = myBrowser.toLowerCase(); if(document.getElementById) { oDoc = document.getElementById(sID); } else if(document.all) { oDoc = document.all[sID]; } var getTxt = ""; if(typeof(oDoc.innerText) != 'undefined') { getTxt = strTrim(oDoc.innerText); } else { getTxt = strTrim(oDoc.innerHTML); //textContent doesn't keep \n with <LI>, so use innerHTML var regExLi = /<\/li>/gi; //RegEx to find </li> var regExHTML = /<\S[^>]*>/g; //RegEx to find HTML Tags var regExAnd = /&/g; //to find ampersand as HTML entity var regExSpace = / /g; //to find whitespace as HTML entity var regExLT = /</g; //to find < as HTML entity var regExGT = />/g; //to find > as HTML entity getTxt = getTxt.replace(regExLi, "\n"); //replace </li> with \n getTxt = getTxt.replace(regExHTML, ""); //strip out all HTML Tags getTxt = getTxt.replace(regExAnd, "&"); //replace & with & getTxt = getTxt.replace(regExSpace, " "); //replace with simple whitespace getTxt = getTxt.replace(regExLT, "<"); //replace < with < getTxt = getTxt.replace(regExGT, ">"); //replace > with > } return getTxt; } |
2.Encode the html tag values into String,
function igEncodeHTML(igHTML) {
var regExLT = /</g;
var regExGT = />/g;
igHTML = igHTML.replace(regExLT, "<");
igHTML = igHTML.replace(regExGT, ">");
return igHTML;
}
function igEncodeHTML(igHTML) { var regExLT = /</g; var regExGT = />/g; igHTML = igHTML.replace(regExLT, "<"); igHTML = igHTML.replace(regExGT, ">"); return igHTML; } |
3.Organize the string values by using regex,
function doCleanUp(sTxt) {
sTxt = sTxt.replace(/(\r\n|\r|\n)/g, "\n");
var arrTxt = sTxt.split("\n");
for(i=0; i<arrTxt.length; i++) {
if(arrTxt[i].substr((arrTxt[i].length-1), 1)==" ") {
arrTxt[i] = arrTxt[i].substr(0, (arrTxt[i].length-1));
}
if(arrTxt[i].substr((arrTxt[i].length-1), 1)==" ") {
arrTxt[i] = arrTxt[i].substr(0, (arrTxt[i].length-1));
}
}
sTxt = arrTxt.join("\n");
var regExNL1a = /([\n]{2,})/g; //to find two consecutive 'newlines'
var regExNL1b = /([ ]{1,})\n/g; //to find more than 1 whitespace before 'newline'
var regExNL1c = /([ |\t]{1,})\n/g; //to find more than 1 tab before 'newline'
var regExNL1d = /\n([ ]{1,})\n/g; //to find a line with only spaces
var regExNL1e = /\n([ |\t]{1,})\n/g; //to find a line with only tabs
var regExNL1g = / {4}/g; //to find 4 space chars
sTxt = sTxt.replace(regExNL1g, " ");
sTxt = sTxt.replace(regExNL1d, "\n").replace(regExNL1e, "\n");
sTxt = sTxt.replace(regExNL1b, "\n").replace(regExNL1c, "\n");
sTxt = sTxt.replace(regExNL1a, "\n");
if(sTxt.substr(0, 1)=="\n") {
sTxt = sTxt.substr(1, sTxt.length);
}
if(sTxt.substr((sTxt.length-1), 1)=="\n") {
sTxt = sTxt.substr(0, (sTxt.length-1));
}
return sTxt;
}
function doCleanUp(sTxt) { sTxt = sTxt.replace(/(\r\n|\r|\n)/g, "\n"); var arrTxt = sTxt.split("\n"); for(i=0; i<arrTxt.length; i++) { if(arrTxt[i].substr((arrTxt[i].length-1), 1)==" ") { arrTxt[i] = arrTxt[i].substr(0, (arrTxt[i].length-1)); } if(arrTxt[i].substr((arrTxt[i].length-1), 1)==" ") { arrTxt[i] = arrTxt[i].substr(0, (arrTxt[i].length-1)); } } sTxt = arrTxt.join("\n"); var regExNL1a = /([\n]{2,})/g; //to find two consecutive 'newlines' var regExNL1b = /([ ]{1,})\n/g; //to find more than 1 whitespace before 'newline' var regExNL1c = /([ |\t]{1,})\n/g; //to find more than 1 tab before 'newline' var regExNL1d = /\n([ ]{1,})\n/g; //to find a line with only spaces var regExNL1e = /\n([ |\t]{1,})\n/g; //to find a line with only tabs var regExNL1g = / {4}/g; //to find 4 space chars sTxt = sTxt.replace(regExNL1g, " "); sTxt = sTxt.replace(regExNL1d, "\n").replace(regExNL1e, "\n"); sTxt = sTxt.replace(regExNL1b, "\n").replace(regExNL1c, "\n"); sTxt = sTxt.replace(regExNL1a, "\n"); if(sTxt.substr(0, 1)=="\n") { sTxt = sTxt.substr(1, sTxt.length); } if(sTxt.substr((sTxt.length-1), 1)=="\n") { sTxt = sTxt.substr(0, (sTxt.length-1)); } return sTxt; } |
4.Paste into Text File,
function showCodeTxt(sId) {
var cdTxt = igEncodeHTML(getTagCode(sId));
cdTxt = doCleanUp(cdTxt);
var cdTxtPrefix = "<html><head><title>NetProgrammingHelp » Plain-Text View</title><style>body { margin:0px; padding:0px; white-space:nowrap; }</style></head><body>\n";
var cdTxtSuffix = "\n<br /></body></html>";
cdWin = window.open("about:blank", "cdWin", "toolbar=0,scrollbars=1,location=0,statusbar=0,menubar=0,resizable=1,width=700,height=400,left=35,top=85");
cdWin.document.open();
cdWin.document.write(cdTxtPrefix+cdTxt+cdTxtSuffix);
cdWin.document.close();
}
function showCodeTxt(sId) { var cdTxt = igEncodeHTML(getTagCode(sId)); cdTxt = doCleanUp(cdTxt); var cdTxtPrefix = "<html><head><title>NetProgrammingHelp » Plain-Text View</title><style>body { margin:0px; padding:0px; white-space:nowrap; }</style></head><body>\n"; var cdTxtSuffix = "\n<br /></body></html>"; cdWin = window.open("about:blank", "cdWin", "toolbar=0,scrollbars=1,location=0,statusbar=0,menubar=0,resizable=1,width=700,height=400,left=35,top=85"); cdWin.document.open(); cdWin.document.write(cdTxtPrefix+cdTxt+cdTxtSuffix); cdWin.document.close(); } |
Conclusion:
Hope this helps,
Happy Coding.
Usually I do not post on blogs, but I would like to say that this article really forced me to do so! Thanks, really nice article.