# $Id: URL.pm 39280 2013-04-22 14:45:43Z wsl $
# $URL: https://svn.uvt.nl/its-id/trunk/sources/aselect-perl/lib/Aselect/URL.pm $

use utf8;
use strict;
use warnings FATAL => 'all';;

package Aselect::URL;

use Encode;
use Exporter qw(import);

our @EXPORT = qw(normalize_url valid_http_url acl_to_re_url $fqdn $fqdnport $hostport);

our $fqdn = qr{(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*(?:\.(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*)+}i;
our $port = qr{:(?:[1-9][0-9]{0,4}|\*)};
our $fqdnport = qr{$fqdn$port?};
our $hostport = qr{(?:\[[0-9a-f.:]+\]|$fqdn|localhost)$port?}i;

sub valid_http_url {
	return shift =~ m{^https?://$hostport?(?:[/?#]|$)}io;
}

sub normalize_url {
	my $url = shift;
	Encode::_utf8_off($url);
	$url =~ s{^([^:/?#.]+://$hostport(?:[/?#]|$))}{lc($1)}e;
	$url =~ s{([^!-~])}{sprintf('%%%02X', ord($1))}eg;
	return $url;
}

# transform a simplified URL specification into a
# strict regular expression
sub acl_to_re_url {
	my $orig = shift;

	return qr{$orig}
		if $orig =~ /^\^/;

	my $url = $orig;
	my $re = quotemeta($url);
	$url =~ s{^(\w+://)}{};

	my $scheme = $1
		? qr{$1}
		: qr{https?://};

	$url =~ s{^(\.($fqdnport)|$hostport)([/?#]|$)}{$3}
		or die "can't parse url '$orig'\n";
	my ($hostpart, $fqdnpart) = ($1, $2);
	my $domain;
	if($fqdnpart) {
		my $wildpart = ($fqdnpart =~ s/:\*$//) ? qr{(?::\d+)?} : '';
		$domain = qr{(?:(?:[a-z0-9.-]+)\.)?\Q$fqdnpart\E$wildpart}i
	} else {
		my $wildpart = ($hostpart =~ s/:\*$//) ? qr{(?::\d+)?} : '';
		$domain = qr{\Q$hostpart\E$wildpart}i;
	}

	die "in '$orig': unable to handle queries or fragments\n"
		if $url =~ m{[?#]};

	$url =~ s{//+}{/}g;
	$url =~ s{^/}{};
	my $path = $url eq ''
		? qr{(?:[/?#]|$)}
		: $url !~ m{/$}
			? qr{/\Q$url\E(?:[/?#]|$)}
			: qr{/\Q$url\E};

	return qr{^$scheme$domain$path};
}

1;
