NetProgrammingHelp.com
Asp.Net,C#,Ajax,Sql server,silverlight,Javascript codes exambles articles,Programming exambles

Recent Posts

  • Silverlight Data Binding OneWay TwoWay Asp.Net C# XAML
  • Accessing Executing Running Stored Procedure/Function/Package Using Linq Silverlight Xaml
  • Creating/Developing Silverlight Data Driven Applications Using Wcf Linq Asp.Net C#
  • Deploying Manually Publish XBAP Wpf Browser Application in IIS Asp.Net C# XAML
  • Creating Server and client using remoting asp.net c#
  • Create/Build wcf autocomplete/autoextender textbox using database linq c# asp.net
  • Essentials for Creating/Developing/Programming silverlight project tools wpf xaml
  • Working with xml data type variable sql server 2008 asp.net c#
  • Encrypting Stored Procedure sql server 2008 asp.net c#
  • sql server build-in functions aggregate functions asp.net c#
  • Convert Html to Text using JavaScript

    Posted by James Categorized Under: JAVASCRIPT one Commented

    Introduction:
    This article shows how to export a html values into text by using javascript.

    Main:

    Converting html into Text,we need to follow the below steps,

    1.Find Html Tag Values by using regex,

    function getTagCode(sID) {
    	var myBrowser = strTrim(navigator.appName.substring(0, 9));
    	myBrowser = myBrowser.toLowerCase();
    	if(document.getElementById) {
    		oDoc = document.getElementById(sID);
    	} else if(document.all) {
    		oDoc = document.all[sID];
    	}
    	var getTxt = "";
    	if(typeof(oDoc.innerText) != 'undefined') {
    		getTxt = strTrim(oDoc.innerText);
    	} else {
    		getTxt = strTrim(oDoc.innerHTML);	//textContent doesn't keep \n with <LI>, so use innerHTML
    		var regExLi = /<\/li>/gi;		//RegEx to find </li>
    		var regExHTML = /<\S[^>]*>/g;	//RegEx to find HTML Tags
    		var regExAnd = /&amp;/g;		//to find ampersand as HTML entity
    		var regExSpace = /&nbsp;/g;		//to find whitespace as HTML entity
    		var regExLT = /&lt;/g;			//to find < as HTML entity
    		var regExGT = /&gt;/g;			//to find > as HTML entity
    		getTxt = getTxt.replace(regExLi, "\n");		//replace </li> with \n
    		getTxt = getTxt.replace(regExHTML, "");		//strip out all HTML Tags
    		getTxt = getTxt.replace(regExAnd, "&");		//replace &amp; with &
    		getTxt = getTxt.replace(regExSpace, " ");	//replace &nbsp; with simple whitespace
    		getTxt = getTxt.replace(regExLT, "<");		//replace &lt; with <
    		getTxt = getTxt.replace(regExGT, ">");		//replace &gt; with >
    	}
    	return getTxt;
    }

    2.Encode the html tag values into String,

    function igEncodeHTML(igHTML) {
    	var regExLT = /</g;
    	var regExGT = />/g;
    	igHTML = igHTML.replace(regExLT, "&lt;");
    	igHTML = igHTML.replace(regExGT, "&gt;");
    	return igHTML;
    }

    3.Organize the string values by using regex,

    function doCleanUp(sTxt) {
    	sTxt = sTxt.replace(/(\r\n|\r|\n)/g, "\n");
    	var arrTxt = sTxt.split("\n");
    	for(i=0; i<arrTxt.length; i++) {
    		if(arrTxt[i].substr((arrTxt[i].length-1), 1)==" ") {
    			arrTxt[i] = arrTxt[i].substr(0, (arrTxt[i].length-1));
    		}
    		if(arrTxt[i].substr((arrTxt[i].length-1), 1)=="	") {
    			arrTxt[i] = arrTxt[i].substr(0, (arrTxt[i].length-1));
    		}
    	}
    	sTxt = arrTxt.join("\n");
    	var regExNL1a = /([\n]{2,})/g;			//to find two consecutive 'newlines'
    	var regExNL1b = /([ ]{1,})\n/g;			//to find more than 1 whitespace before 'newline'
    	var regExNL1c = /([	|\t]{1,})\n/g;		//to find more than 1 tab before 'newline'
    	var regExNL1d = /\n([ ]{1,})\n/g;		//to find a line with only spaces
    	var regExNL1e = /\n([	|\t]{1,})\n/g;	//to find a line with only tabs
    	var regExNL1g = / {4}/g;				//to find 4 space chars
    	sTxt = sTxt.replace(regExNL1g, "	");
    	sTxt = sTxt.replace(regExNL1d, "\n").replace(regExNL1e, "\n");
    	sTxt = sTxt.replace(regExNL1b, "\n").replace(regExNL1c, "\n");
    	sTxt = sTxt.replace(regExNL1a, "\n");
    	if(sTxt.substr(0, 1)=="\n") {
    		sTxt = sTxt.substr(1, sTxt.length);
    	}
    	if(sTxt.substr((sTxt.length-1), 1)=="\n") {
    		sTxt = sTxt.substr(0, (sTxt.length-1));
    	}
    	return sTxt;
    }

    4.Paste into Text File,

    function showCodeTxt(sId) {
    	var cdTxt = igEncodeHTML(getTagCode(sId));
    	cdTxt = doCleanUp(cdTxt);
    	var cdTxtPrefix = "<html><head><title>NetProgrammingHelp &raquo; Plain-Text View</title><style>body { margin:0px; padding:0px; white-space:nowrap; }</style></head><body>\n";
    	var cdTxtSuffix = "\n<br /></body></html>";
    	cdWin = window.open("about:blank", "cdWin", "toolbar=0,scrollbars=1,location=0,statusbar=0,menubar=0,resizable=1,width=700,height=400,left=35,top=85");
    	cdWin.document.open();
    	cdWin.document.write(cdTxtPrefix+cdTxt+cdTxtSuffix);
    	cdWin.document.close();
    }

    Conclusion:
    Hope this helps,
    Happy Coding.

    One Response to “Convert Html to Text using JavaScript ”

    1. Usually I do not post on blogs, but I would like to say that this article really forced me to do so! Thanks, really nice article.

    Leave a Reply