Visit Mozilla.org

Code snippets:HTML to DOM

From MDC

Contents


[edit] Using a hidden browser element to parse HTML to a window's DOM

Sample code may need more work. Create your own function using unique name, id and etc.

 var browser = document.getElementById("sample-browser");
 if (!browser) {
   // create browser
   browser = document.createElement("browser");
   browser.setAttribute("id", "sample-browser");
   browser.setAttribute("name", "sample-browser");
   browser.setAttribute("type", "content");
   browser.style.height = "0px";
   document.documentElement.appendChild(browser);
   // set restrictions as needed
   browser.webNavigation.allowAuth = true;
   browser.webNavigation.allowImages = false;
   browser.webNavigation.allowJavascript = false;
   browser.webNavigation.allowMetaRedirects = true;
   browser.webNavigation.allowPlugins = false;
   browser.webNavigation.allowSubframes = false;
 } 
 
 
 // listen for load
 browser.addEventListener("load", function (event) {
   // the document of the HTML in the DOM
   var doc = event.originalTarget;
   // do something with the DOM of doc
   // remove browser element when done
   var browser = document.getElementById("sample-browser");
   document.documentElement.removeChild(browser);
 }, true);
 // load a page
 browser.loadURI("http://www.mozilla.org/");

// // or about:blank document in the DOM
// // works with FF2 and less
// var doc = browser.contentDocument;
// // write in new HTML from a string
// doc.open();
// doc.write("Some html string");
// doc.close(); 
// // do something with the DOM of doc   
// // remove browser element when done
// document.documentElement.removeChild(browser);

If you are starting with an html string you can convert it to a data: url and use that for a url to load in the browser element. This also works with FF3
data: url MDC
data: url Wikipedia

[edit] Using a hidden XUL iframe (complete example)

Sometimes, a browser element is an overkill, or does not meet your needs, or you can't fulfill it's requirements. While working on Donkeyfire, I discovered the iframe XUL element, and it is very easy to implement it.

As an example, I will show a browser overlay .xul file, and some JS code to access it.

Here is some XUL code you can add to your browser overlay .xul file. Don't forget to modify the id and name!

<vbox hidden="false" height="0">
		<iframe type="content" src="" name="donkey-browser" hidden="false" id="donkey-browser" height="0"/>
</vbox>

Then, in your extension's "load" event handler:

onLoad: function() {
	donkeybrowser = document.getElementById("donkey-browser");
	if (donkeybrowser) {
		donkeybrowser.style.height = "0px";
		donkeybrowser.webNavigation.allowAuth = true;
		donkeybrowser.webNavigation.allowImages = false;
		donkeybrowser.webNavigation.allowJavascript = false;
		donkeybrowser.webNavigation.allowMetaRedirects = true;
		donkeybrowser.webNavigation.allowPlugins = false;
		donkeybrowser.webNavigation.allowSubframes = false;
		donkeybrowser.addEventListener("DOMContentLoaded", function (e) { donkeyfire.donkeybrowser_onPageLoad(e); }, true);
	}


With that code, we obtain a reference to the iframe element we declared in the .xul file. The most interesting piece of code here is the DOMContentLoaded event listener we define for the element. Let's take a look at the donkeyfire.donkeybrowser_onPageLoad() handler:

donkeybrowser_onPageLoad: function(aEvent) {
	var doc = aEvent.originalTarget;
	var url = doc.location.href;
	if (aEvent.originalTarget.nodeName == "#document") { // ok, it's a real page, let's do our magic
		dump("[DF] URL = "+url+"\n");
		var text = doc.evaluate("/html/body/h1",doc,null,XPathResult.STRING_TYPE,null).stringValue;
		dump("[DF] TEXT in /html/body/h1 = "+text+"\n");
	}
},

As you can see, we obtain full access to the DOM of the page we loaded in background, and we can even evaluate XPath expressions. In the example, we dump() to the console the page's URL and the text contained in the first H1 tag of the page's BODY.

But, we still need to see how to execute the famous loadURI() method using our iframe:

donkeybrowser.webNavigation.loadURI("http://developer.mozilla.org",Components.interfaces.nsIWebNavigation,null,null,null);

Also, I recommend you take a look at the nsIWebNavigation interface.