JavaScript - - By James Edwards

Dealing with unqualified HREF values (Part 2)

In my original blog post, Dealing with unqualified HREF values, I put forward a method for converting an href value in any format into a fully-qualified URL, using data from the document location object.

However, as one commentator pointed out, the proposed solution couldn’t cater for changes in location context, such as that caused by a <base> element, or within the context of an included document such as a stylesheet or a page in an <iframe>.

To fix that it was necessary to abandon the use of the location object entirely, and parse URLs as strings. But the upshot of this is a far more flexible and useful method, that automatically caters for <base> elements, and is also able to accept an input location string to use as context.

So from the original code, this:

//get the current document location object
var loc = document.location;

Becomes this:

//get the current document location href
var here = document.location.href;

//look for a base element to use instead
var bases = document.getElementsByTagName('base');
if(bases.length > 0)
{
	var basehref = bases[0].getAttribute('href');
	if(basehref && basehref != '')
	{
		here = basehref;
	}
}

//if the context argument is present and non-empty string, use that instead
if(typeof context == 'string' && context != '')
{
	here = context;
}

//extract the protocol, host and path
//and create a location object with the data
var parts = here.replace('//', '/').split('/');
var loc = {
	'protocol' : parts[0],
	'host' : parts[1]
	}
parts.splice(0, 2);
loc.pathname = '/' + parts.join('/');

Here’s the complete revised function:

//qualify an HREF to form a complete URI
function qualifyHREF(href, context)
{
	//get the current document location href
	var here = document.location.href;

	//look for a base element to use instead
	var bases = document.getElementsByTagName('base');
	if(bases.length > 0)
	{
		var basehref = bases[0].getAttribute('href');
		if(basehref && basehref != '')
		{
			here = basehref;
		}
	}

	//if the context argument is present and non-empty string, use that instead
	if(typeof context == 'string' && context != '')
	{
		here = context;
	}

	//extract the protocol, host and path
	//and create a location object with the data
	var parts = here.replace('//', '/').split('/');
	var loc = {
		'protocol' : parts[0],
		'host' : parts[1]
		}
	parts.splice(0, 2);
	loc.pathname = '/' + parts.join('/');

	//build a base URI from the protocol plus host (which includes port if applicable)
	var uri = loc.protocol + '//' + loc.host;

	//if the input path is relative-from-here
	//just delete the ./ token to make it relative
	if(/^(./)([^/]?)/.test(href))
	{
		href = href.replace(/^(./)([^/]?)/, '$2');
	}

	//if the input href is already qualified, copy it unchanged
	if(/^([a-z]+):///.test(href))
	{
		uri = href;
	}

	//or if the input href begins with a leading slash, then it's base relative
	//so just add the input href to the base URI
	else if(href.substr(0, 1) == '/')
	{
		uri += href;
	}

	//or if it's an up-reference we need to compute the path
	else if(/^((../)+)([^/].*$)/.test(href))
	{
		//get the last part of the path, minus up-references
		var lastpath = href.match(/^((../)+)([^/].*$)/);
		lastpath = lastpath[lastpath.length - 1];

		//count the number of up-references
		var references = href.split('../').length - 1;

		//get the path parts and delete the last one (this page or directory)
		var parts = loc.pathname.split('/');
		parts = parts.splice(0, parts.length - 1);

		//for each of the up-references, delete the last part of the path
		for(var i=0; i<references; i++)
		{
			parts = parts.splice(0, parts.length - 1);
		}

		//now rebuild the path
		var path = '';
		for(i=0; i<parts.length; i++)
		{
			if(parts[i] != '')
			{
				path += '/' + parts[i];
			}
		}
		path += '/';

		//and add the last part of the path
		path += lastpath;

		//then add the path and input href to the base URI
		uri += path;
	}

	//otherwise it's a relative path,
	else
	{
		//calculate the path to this directory
		path = '';
		parts = loc.pathname.split('/');
		parts = parts.splice(0, parts.length - 1);
		for(var i=0; i<parts.length; i++)
		{
			if(parts[i] != '')
			{
				path += '/' + parts[i];
			}
		}
		path += '/';

		//then add the path and input href to the base URI
		uri += path + href;
	}

	//return the final uri
	return uri;
}

But wait … there’s more!

Having done that, I realised I was only a hop and a skip away from implementing a JavaScript equivalent of PHP’s parse_url method:

//parse a URL to form an object of properties
function parseURL(url)
{
	//save the unmodified url to href property
	//so that the object we get back contains
	//all the same properties as the built-in location object
	var loc = { 'href' : url };

	//split the URL by single-slashes to get the component parts
	var parts = url.replace('//', '/').split('/');

	//store the protocol and host
	loc.protocol = parts[0];
	loc.host = parts[1];

	//extract any port number from the host
	//from which we derive the port and hostname
	parts[1] = parts[1].split(':');
	loc.hostname = parts[1][0];
	loc.port = parts[1].length > 1 ? parts[1][1] : '';

	//splice and join the remainder to get the pathname
	parts.splice(0, 2);
	loc.pathname = '/' + parts.join('/');

	//extract any hash and remove from the pathname
	loc.pathname = loc.pathname.split('#');
	loc.hash = loc.pathname.length > 1 ? '#' + loc.pathname[1] : '';
	loc.pathname = loc.pathname[0];

	//extract any search query and remove from the pathname
	loc.pathname = loc.pathname.split('?');
	loc.search = loc.pathname.length > 1 ? '?' + loc.pathname[1] : '';
	loc.pathname = loc.pathname[0];

	//return the final object
	return loc;
}

So that’s two more for the toolkit!

Sponsors