package UvT::Email::Template;

# $Id: Template.pm 47785 2018-09-13 12:29:25Z wsl $
# $URL: https://svn.uvt.nl/its-id/trunk/sources/uvt-email-template/lib/UvT/Email/Template.pm $

# formatteer een XML-bericht tot een multipart/alternative MIME

use strict;
use warnings FATAL => 'all';
use utf8;

use Clarity -self;

use XML::LibXML;
use XML::LibXSLT;
use Email::MIME::Creator;
use MIME::Types;
use Image::Size ();
use Text::Tabs qw(expand);
use Scalar::Util qw(weaken reftype refaddr);

{
	# perl 5.12+ kan dit met each() maar zit niet in squeeze :\

	my %array_each;
	my %array_each_i;

	sub array_each(\@) {
		my $arr = shift;
		confess "Not an array or arrayref"
			unless reftype($arr) eq 'ARRAY';
		my $addr = refaddr($arr);
		unless(defined $array_each{$addr}) {
			while(my ($k, $v) = each(%array_each)) {
				unless(defined $v) {
					delete $array_each{$k};
					delete $array_each_i{$k};
				}
			}
			$array_each{$addr} = $arr;
			weaken($array_each{$addr});
			$array_each_i{$addr} = 0;
		}
		my $i = $array_each_i{$addr}++;
		return wantarray ? ($i, $arr->[$i]) : $i
			if $i < @$arr;
		delete $array_each{$addr};
		delete $array_each_i{$addr};
		return;
	}
}

our $columns = 76;

sub width {
	my $str = shift;
	# the order matters: the backspace code doesn't like combining
	# characters so they need to be removed. The cjk code doesn't know
	# how to deal with backspaces, and the tab code doesn't know how
	# to deal with any of the other special characters so they need to
	# be removed first.
	$str =~ s/(?:\p{Me}|\p{Mn})+//g;      # combining characters
	$str =~ s/(?:^|.)\010//gm;            # backspaces
	$str =~ s/(?:\p{Ea=W}|\p{Ea=F})/xx/g; # cjk characters
	$str = expand($str);                  # tabs
	return length($str);
}

sub wrap {
	my ($init, $cont, $text) = @_;
	my @lines = split(/(\n)/, $text);
	my $pfx = $init;
	my $res = '';
	while(@lines) {
		my $line = shift(@lines);
		my $eol = shift(@lines) // '';
		if($line =~ /[│─]/) {
			# don't try to wrap line art
			$res .= $pfx;
			$res .= $line;
			$res .= $eol;
			$pfx = $cont;
			next;
		}
		my @words = split(/((?:\s(?:\010.)?|(?:\S\010)\s)+)/, $line);
		my $buf = shift(@words) // '';
		while(@words) {
			if($buf eq '') {
				$buf .= shift(@words) // '';
				$buf .= shift(@words) // '';
			} else {
				my $space = shift(@words);
				last unless @words;
				my $next = shift(@words);
				if($self->width($pfx.$buf.$space.$next) > $columns) {
					$res .= $pfx;
					$res .= $buf;
					$res .= "\n";
					$pfx = $cont;
					$buf = $next;
				} else {
					$buf .= $space;
					$buf .= $next;
				}
			}
		}
		$res .= $pfx;
		$res .= $buf;
		$res .= $eol;
		$pfx = $cont;
	}
	return $res;
}

sub load_file {
	my $file = shift;
	open my $fh, '<:raw', $file
		or die "$file: $!\n";
	local $/;
	return scalar <$fh>;
}

field bullets => [qw(• · ‣ ⁃)];
our $bullet = 0;

sub format_strong {
	my $text = $self->nodes_to_text(shift->childNodes);
#	return "*$text*";
	$text =~ s/(\S)/$1\010$1/g;
	return $text;
}

sub format_em {
	my $text = $self->nodes_to_text(shift->childNodes);
#	return "/$text/";
	$text =~ s/([^_\n])/$1\010_/g;
	return $text;
}

sub format_ul {
	my $bullets = $self->bullets;
	my $b = $bullets->[$bullet];
	local $bullet = ($bullet + 1) % @$bullets;
	return $self->list_to_text("  $b ", @_);
}

sub format_ol {
	return $self->list_to_text("%2d) ", @_);
}

sub format_a {
	my $node = shift;
	my $text = $self->nodes_to_text($node->childNodes);
	my $link = $node->getAttribute('href')
		or die "'href' attribute missing on <a>\n";
	return "$text <$link>";
}

sub format_img {
	my $node = shift;
	my $alt = $node->getAttribute('alt')
		or die "'alt' attribute missing on <img>\n";
	return "[$alt]";
}

sub format_br {
	return "\n";
}

sub format_th {
	return $self->format_strong(@_);
}

sub format_td {
	return $self->nodes_to_text(shift->childNodes);
}

sub format_table {
	my $node = shift;
	die "table text must be inside th/td element\n"
		if $node->exists("text()[normalize-space()!='']|*/text()[normalize-space()!='']");
	my @r;
	foreach my $r ($node->childNodes) {
		next unless $r->nodeType == XML_ELEMENT_NODE;
		my $name = $r->nodeName // next;
		die "<table> element must only contain <tr> elements\n"
			unless $name eq 'tr';
		my @c;
		foreach my $c ($r->childNodes) {
			next unless $r->nodeType == XML_ELEMENT_NODE;
			my $name = $c->nodeName // next;
			die "<tr> element must only contain <th> or <td> elements\n"
				unless $name eq 'th' || $name eq 'td';
			push @c, $c;
		}
		push @r, \@c;
	}
	return $self->build_table(\@r);
}

sub list_to_text {
	my ($fmt, $node) = @_;
	my $width = length(sprintf($fmt, 0));
	my $indent = " "x$width;
	my $index = 0;
	my $text = '';
	foreach my $item ($node->findnodes('li')) {
		$index++;
		my @text = do {
			local $columns = $columns - $width;
			$self->nodes_to_text($item->childNodes)
		};
		$text .= "\n\n";
		$text .= $self->wrap(sprintf($fmt, $index), $indent, join("\n", map { "$_\n" } @text));
		$text .= "\n\n";
	}
	return $text;
}

sub nodes_to_text {
	my $text = '';
	foreach my $node (@_) {
		my $type = $node->nodeType;
		if($type == XML_ELEMENT_NODE) {
			my $name = $node->nodeName;
			my $h = "format_$name";
			die "unknown element type '$name' encountered\n"
				unless $self->can($h);
			$text .= $self->$h($node);
		} elsif($type == XML_TEXT_NODE || $type == XML_CDATA_SECTION_NODE) {
			my $contents = $node->nodeValue;
			$contents =~ s/\s+/ /g;
			$text .= $contents;
		} elsif($type == XML_ATTRIBUTE_NODE || $type == XML_COMMENT_NODE) {
			# ignore
		} else {
			die "unknown node type $type encountered\n";
		}
	}
	return $text;
}

sub nodes_to_par {
	my $text = $self->wrap('', '', $self->nodes_to_text(@_));
	$text =~ s/[ \t]+\n/\n/g;
	$text =~ s/\n\s*\n(?:\s*\n)+/\n\n/g;
	$text =~ s/\s+$//;
	$text =~ s/^(?:\s*\n)+//;
	return $text;
}

sub pad_text {
	my ($text, $width, $height) = @_;
	$text =~ s/\s+$//s;
	my $res = '';
	my $i = 0;
	my @lines = split(/\n/, $text);
	while(my ($i, $line) = array_each(@lines)) {
		my $len = $self->width($line);
		$res .= $line . (' 'x($width - $len)) . "\n";
	}
	$res .= ((' 'x$width) . "\n")x($height - @lines);
	return $res;
}

sub build_table {
	my $table = shift;
	my $minwidth = $columns / @{$table->[0]} - 3;
	my @colwidths;

	# formatteer alle cellen naar width en onthoud de breedte
	foreach my $row (@$table) {
		while(my ($i, $col) = array_each(@$row)) {
			my $text = $self->nodes_to_text($col);
			foreach my $line (split(/\n/, $text)) {
				my $len = $self->width($line);
				$colwidths[$i] = $len
					if ($colwidths[$i] // 0) < $len;
			}
		}
	}

	# haal alle kolommen die toch wel in minwidth passen uit de mix
	my $restwidth = 0;
	my $widecols = 0;
	foreach my $w (@colwidths) {
		if($w > $minwidth) {
			$widecols++;
		} else {
			$restwidth += $w + 3
		}
	}

	# verdeel de resterende breedte over de resterende kolommen
	foreach my $w (@colwidths) {
		if($w > $minwidth) {
			$w = ($columns - $restwidth) / $widecols - 3;
		}
	}

	my @realcolwidths;
	# formatteer alle cellen naar width en onthoud de breedte
	foreach my $row (@$table) {
		while(my ($i, $col) = array_each(@$row)) {
			local $columns = $colwidths[$i];
			my $text = $self->nodes_to_par($col);
			foreach my $line (split(/\n/, $text)) {
				my $len = $self->width($line);
				$realcolwidths[$i] = $len
					if ($realcolwidths[$i] // 0) < $len;
			}
		}
	}

	my $res = "\n\n┌";
	while(my ($i, $w) = array_each(@realcolwidths)) {
		$res .= '─'x($w+2);
		$res .= $i == $#realcolwidths ? '┐' : '┬';
	}
	$res .= "\n";

	# formatteer alle cellen naar de berekende width
	while(my ($j, $row) = array_each(@$table)) {
		my @cells;
		my $height = 0;
		while(my ($i, $col) = array_each(@$row)) {
			local $columns = $realcolwidths[$i];
			my $text = $self->nodes_to_par($col);
			$text =~ s/\s+$//s;
			push @cells, $text;
			my @h = split(/\n/, $text);
			$height = @h if $height < @h;
		}
		my $rowres = "│\n"x$height;
		while(my ($i, $cell) = array_each(@cells)) {
			my @a = split(/\n/, $self->pad_text($cell, $realcolwidths[$i], $height));
			$rowres =~ s/\n/" ".(shift @a)." │\n"/esg;
		}
		$res .= $rowres;
		$res .= $j == $#$table ? '└' : '├';
		while(my ($i, $w) = array_each(@realcolwidths)) {
			$res .= '─'x($w+2);
			$res .= $i == $#realcolwidths
				? $j == $#$table ? '┘' : '┤'
				: $j == $#$table ? '┴' : '┼';
		}
		$res .= "\n";

	}
	$res .= "\n";

	return $res;
}

sub par_to_text {
	return $self->nodes_to_par(shift->childNodes);
}

sub kop_to_text {
	my $text = shift;
	return $self->wrap('', '', "– $text –");
}

sub doc_to_text {
	my $doc = shift;
	my @pars;

	my $have_nederlands = $doc->exists('mailtje/nederlands');
	my $have_engels = $doc->exists('mailtje/engels');

	if($have_nederlands) {
		push @pars, "(English version below)" if $have_engels;
		push @pars,
			$self->kop_to_text($doc->findvalue('mailtje/nederlands/kop')),
			(map { $self->par_to_text($_) } $doc->findnodes('mailtje/nederlands/par|mailtje/allebei/par'));
	}
	if($have_engels) {
		push @pars, ('─'x$columns) if $have_nederlands;
		push @pars,
			$self->kop_to_text($doc->findvalue('mailtje/engels/kop')),
			(map { $self->par_to_text($_) } $doc->findnodes('mailtje/engels/par|mailtje/allebei/par'));
	}

	push @pars,
		"-- \nTilburg University <www.tilburguniversity.edu> – Warandelaan 2, 5037 AB Tilburg\n\nroutebeschrijving: <http://www.tilburguniversity.edu/nl/contact/>\nroute description: <http://www.tilburguniversity.edu/contact/>";

	my $text = join("\n", map { "$_\n" } @pars);
	utf8::encode($text);
	return $text;
}

field xml_parse_options => {
	expand_entities => 0,
	expand_xinclude => 0,
	load_ext_dtd => 0,
	no_network => 1,
	pedantic_parser => 1,
};

field xml => sub {
	my $self = shift;
	my $xml = new XML::LibXML;
	$xml->set_options($self->xml_parse_options);
	return $xml;
};

field xslt => sub { new XML::LibXSLT };

sub htmltemplate {
	return $self->{htmltemplate} unless @_;
	my $t = shift;
	$t = $self->xslt->parse_stylesheet_file($t)
		unless ref $t;
	return $self->{htmltemplate} = $t;
}

field imagepath => '.';

sub transformer {
	return $self->{transformer} unless @_;
	my $t = shift;
	$t = $self->xslt->parse_stylesheet_file($t)
		unless ref $t;
	return $self->{transformer} = $t;
}

field mimetypes => sub { new MIME::Types };

sub format {
	my $doc = shift;

	my $images = ref $_[0] ? shift : {};

	$doc = $self->xml->parse_file($doc)
		unless ref $doc;

	if(my $transformer = $self->transformer) {
		$doc = $transformer->transform($doc);
	}

	my $text = $self->doc_to_text($doc);

	my $html_xsl = $self->htmltemplate;
	confess "No HTML template configured" unless defined $html_xsl;

	my $html_dom = $html_xsl->transform($doc);

	my @attachments;

	my $xpc = new XML::LibXML::XPathContext;
	$xpc->registerNs(html => 'http://www.w3.org/1999/xhtml');

	my $imagepath = $self->imagepath;
	$imagepath = [$imagepath] unless ref $imagepath;

	my $mimetypes;

	my %attachedimages;

	foreach my $img ($xpc->findnodes('//html:img', $html_dom)) {
		my $src = $img->getAttribute('src')
			// die "img element has no 'src' attribute\n";
		next if $src =~ /^[a-z0-9_-]+:/i;

		my ($id, $width, $height);

		if(exists $attachedimages{$src}) {
			($id, $width, $height) = @{$attachedimages{$src}};
		} else {
			my $data;
			my $type;
			my $name = $src;

			if(exists $images->{$src}) {
				my $spec = $images->{$src};
				if(ref $spec) {
					$type = $spec->{type};
					$data = $spec->{data};
				}
			} else {
				die "refusing to open image file with funny name ($src)\n"
					unless $src =~ /^[a-z0-9]+(?:[_\/.-][a-z0-9]+)*$/;

				my $path;
				foreach my $p (@$imagepath) {
					next unless -f "$p/$src";
					$path = "$p/$src";
					last;
				}
				die "image '$src' not found in imagepath [@$imagepath]\n"
					unless defined $path;

				$name = $path;
				$data = $self->load_file($path);
			}

			($width, $height, my $imgtype) = Image::Size::imgsize(\$data);

			if(defined $type) {
				die "file '$name' ($type) is not an image\n"
					unless $type =~ m{^image/};
				if(defined $imgtype) {
					$mimetypes //= $self->mimetypes;
					my $mimetype = $mimetypes->mimeTypeOf($imgtype);
					unless(defined $mimetype && $mimetype->type eq $type) {
						# something is wonky, play safe
						undef $width;
						undef $height;
					}
				}
			} else {
				$mimetypes //= $self->mimetypes;
				my $mimetype = $mimetypes->mimeTypeOf($imgtype // $src);
				die "unable to determine mime type for '$name'\n"
					unless defined $mimetype;
				$type = $mimetype->type;
				die "file '$name' ($type) is not an image\n"
					unless $mimetype->mediaType eq 'image';
			}

			$id = @attachments + 1;
			push @attachments, Email::MIME->create(
				attributes => {
					content_type => $type,
					encoding => 'base64',
					disposition => 'inline',
					name => $src,
					filename => $src,
				},
				body => $data,
				header_str => ['Content-ID' => "<$id>"]
			);

			$attachedimages{$src} = [$id, $width, $height];
		}

		$img->setAttribute(src => "cid:$id");
		unless(defined $img->getAttribute('width') || defined $img->getAttribute('height')) {
			$img->setAttribute(width => $width) if defined $width;
			$img->setAttribute(height => $height) if defined $height;
		}
	}

	my $html = $html_dom->toStringHTML;

	# elements marked F in http://www.w3.org/TR/html4/index/elements.html:
	$html =~ s{<((area|base(?:font)?|br|col|frame|hr|img|input|isindex|link|meta|param)(?: [^"'=<>]+(?:=(?:"[^"]*"|'[^']*'))?)*)></\2>}{<$1 />}gi;

	my $text_part = Email::MIME->create(attributes => {content_type => 'text/plain', encoding => 'quoted-printable', charset => 'UTF-8'}, body => $text);
	my $html_part = Email::MIME->create(attributes => {content_type => 'text/html', encoding => 'quoted-printable'}, body => $html);

	my $alt = Email::MIME->create(attributes => {content_type => 'multipart/alternative'}, parts => [$text_part, $html_part]);
	my $msg = Email::MIME->create(attributes => {content_type => 'multipart/related'}, header_str => \@_, parts => [$alt, @attachments]);

	my $mime = $msg->as_string;
	$mime =~ s/[\r\t ]+\n/\n/sg;

	return $mime;
}

__END__

=pod

=encoding utf8

=head1 NAME

UvT::Email::Format - UvT style email formatting

=head1 SYNOPSIS

 use UvT::Email::Format;
 my $e = new UvT::Email::Format(htmltemplate => '/path/to/html.xsl');
 print $e->format('input.xml');

=head1 INTRODUCTION

This class can create ready to use e-mails formatted using XSL templates
(for HTML) and a built-in XML converter (for text). The resulting MIME
structure is a multipart/alternative structure containing both the HTML
and text versions.

Input is supplied as a custom bit of XML. See the INPUT section for
information about the required structure.

=head1 METHODS

=head2 new(...)

The C<new> class method creates a new UvT::Email::Format object. It uses
the standard Clarity syntax for its arguments. You can use it to supply the
required fields immediately.

=head2 htmltemplate

This required field should be set to either a filename or a completely
prepared XML::LibXSLT::Stylesheet object. This is the XSL stylesheet that
will be used to generate the HTML part of the e-mail.

=head2 xml_parse_options

This field contains the XML parser options that are used to parse any XML.
It's a hash that you can either replace or modify. See XML::LibXML::Parser
for details.

=head2 transformer

An optional stylesheet that will be applied to your XML document when you
call $e->format(). Can be either a filename or a completely prepared
XML::LibXSLT::Stylesheet object.

=head2 imagepath

This field holds an array of strings that is used as the search path for
images. The default value is C<['.']>.

=head2 format($doc, Header => "value", ...)

The workhorse of this class; it takes an XML filename or completely
prepared XML::LibXML::Document object as its first parameter. The return
value of this method is a string that forms a complete RFC2822 e-mail.

Additional parameters are interpreted as extra headers that will be added
to the resulting e-mail message (with appropriate encoding).

=head1 INPUT

The input for the format() method is an XML file with the following structure:

  <mailtje>
    <disclaimer/>

    <nederlands>
      <kop>text</kop>
      <par>html</par>
      <par>html</par>
      ...
    </nederlands>
    <engels>
      <kop>text</kop>
      <par>html</par>
      <par>html</par>
      ...
    </engels>

    <allebei>
      <par>html</par>
      <par>html</par>
      ...
    </allebei>

    <nederlands>
      <par>html</par>
      <par>html</par>
      ...
    </nederlands>
    <engels>
      <par>html</par>
      <par>html</par>
      ...
    </engels>
    ...
  </mailtje>

The root tag of this XML document must be "mailtje". The document must contain
at least one "nederlands" or "engels" element.

The "allebei" elements are optional. They describe a text part that will
appear in both language sections in the output. In the HTML output this
part will be centered and span both columns.

The first "nederlands" and the first "engels" elements must have exactly
one "kop" element as the first member. No other elements may contain a
"kop" element.

The empty <disclaimer/> tag is optional and indicates whether you want a
disclaimer about automated translations in the HTML version.

The allowed html elements and attributes are:

=over

=item strong

=item em

=item ul

=item ol

=item li

=item a

(the "href" attribute is required)

=item img

(the "src" and "alt" attributes are required)

=item br

=item table

=item tr

=item th

=item td

=back

Unless noted otherwise, no attributes are allowed.

=head1 AUTHOR

Wessel Dankers <wsl@uvt.nl>

=head1 COPYRIGHT

Copyright © 2013 Tilburg University.

Licensed under the GPLv3 license or (at your option) any later version.
