import os
from ConfigParser import ConfigParser
from datetime import date
from httplib import HTTPConnection
from lxml.etree import Element, SubElement, fromstring, parse, tostring, XSLT
from shutil import rmtree
from StringIO import StringIO
from sys import argv
from urllib import urlopen
from xml.sax.saxutils import escape as xml_escape

# FIXME nsMap uit config gebruiken
nsMap = {
    'ddi'   :"http://www.icpsr.umich.edu/DDI",
    'dii'   :"urn:mpeg:mpeg21:2002:01-DII-NS",
    'didl'  :"urn:mpeg:mpeg21:2002:02-DIDL-NS",
    'dip'   :"urn:mpeg:mpeg21:2005:01-DIP-NS",
    'foaf'  :"http://xmlns.com/foaf/0.1/",
    'oai'   :'http://www.openarchives.org/OAI/2.0/',
    'mods'  :"http://www.loc.gov/mods/v3",
    'nereus':"http://www.nereus4economics.info/",
    'rdf'   :"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    'srw'   :"http://www.loc.gov/zing/srw/",
    'extra' :"http://meresco.com/namespace/fields/extra",
    'meta'  :"http://meresco.com/namespace/harvester/meta",
}

# FIXME dit moet uit de admin files gehaald worden
setMap = {
    'cerge-cuni-cz'         :"",
    'columbia-university-us':"",
    'dauphine-fr'           :"",
    'eui-eu'                :"",
    'ifw-kiel-de'           :"",
    'kuleuven-be'           :"",
    'lse-ac-uk'             :"", # fulltext
    'ox-ac-uk'              :"",
    'repub-eur-nl'          :"",
    'tilburguniversity-nl'  :"",
    'uc3m-es'               :"",
    'ucd-ie'                :"",
    'ucl-ac-uk'             :"", # fulltext
    'uclouvain-be'          :"",
    'ulb-ac-be'             :"",
    'unimaas-nl'            :"withfulltext",
    'univ-tlse1-fr'         :"",
    'Warwick-uk'            :"",
    'www-sciences-po-fr'    :""
}

def getSectionDict(config,  section = ''):
# returns a hash of an entire section
    hash = {}
    options  = config.items(section)
    for option,value in options:
        hash[option] = value
    return hash

class ArchiveMaker():
    def __init__(self, configfile):
        self.config = ConfigParser()
        self.config.read(configfile)
        self._xsltDir = self.config.get('main', 'xsltDir')
        self._xslt = XSLT(parse(open(os.path.join(self._xsltDir, "didlmods2redif.xsl"))))
        self._addHandle = XSLT(parse(open(os.path.join(self._xsltDir, "addRePEcHandle.xsl"))))
        self._docroot = self.config.get('archive', 'docroot')
        self._archive = self.config.get('archive', 'id')
        self._baseURL = self.config.get('partnersinfo', 'baseURL')
        self._maximumRecords = int(self.config.get('reader', 'maximumRecords'))
        self._seriesMap = getSectionDict(self.config, 'series')
        self._excludeSet = getSectionDict(self.config, 'excludeSet')
        self._tmp = os.path.join(self._docroot, self._archive + "_tmp")
        rmtree(self._tmp, ignore_errors=True)
        os.mkdir(self._tmp)

    def make_metadata_file(self, query, seriesId):
        stop = self._maximumRecords
        # FIXME sortering
        #sortBy =  ''
        #sortDescending = True
        baseURL = self.config.get('reader', 'baseURL')
        url = baseURL + \
                "?version=1.1&operation=searchRetrieve&maximumRecords=%s&x-recordSchema=meta&query=%s" %(stop, query)
        s = ""
        diag = ""
        try:
            sruResponse = parse(urlopen(url))
        except IOError, (errno, strerror):
            diag += "ERROR url=%s errno=%s strerror=%s" % (url, errno, strerror)
            return -1, s, diag
        total = int(sruResponse.xpath("//srw:numberOfRecords/text()", nsMap)[0])
            
        if total == 0:
            ## no publications found ##
            diag += "WARNING no records found url=%s " % (url)
            return 0, s, diag
        
        s = "\xEF\xBB\xBF"
        records = sruResponse.xpath("//srw:record", nsMap)
        for record in records:
            didlMods = record.xpath("srw:recordData/oai:metadata/didl:DIDL", nsMap)[0]
            try:
                docid = didlMods.xpath("didl:Item/didl:Descriptor/didl:Statement/dii:Identifier/text()", nsMap)[0].split("#", 1)[0]
            except IndexError:
                continue
            handle = "RePEc:ner:" + seriesId + ":" + docid
            qhandle = "\'" + handle + "\'"
            redif = self._xslt(didlMods, handle=qhandle)
            s += str(redif)
            s += "\n\n"
            meta = record.xpath('srw:extraRecordData/recordData[@recordSchema="meta"]/meta:meta', nsMap)[0]
            metaHandle = meta.xpath('meta:record/meta:repec/meta:handle/text()', nsMap)
            if not metaHandle or metaHandle[0] != handle:
                qtoday = "\'" + str(date.today()) + "\'"
                newMeta = tostring(self._addHandle(meta, handle=qhandle, today=qtoday))
                repository = meta.xpath("meta:repository/meta:id/text()", nsMap)[0]
                recordIdentifier = meta.xpath("meta:record/meta:id/text()", nsMap)[0]
                document = Document(newMeta, '').get(method="Element")
                recordUpdate = UpdateRequest("replace", repository, recordIdentifier, document)
                #print recordUpdate.get(xml_declaration=True)
                port = self.config.get('writer', 'port')
                self._status, d = recordUpdate.send(port)
                if self._status == 'fail': diag += " failed to write handle " + d
        return total, s, diag

    def make_archive_file(self):
        n = "\n"
        f = "Template-type: "     + "ReDIF-Archive 1.0"                 + n
        f += "Handle: RePEc:"     + self._archive                       + n
        f += "Name: "             + self.config.get('archive', 'name')  + n
        f += "Maintainer-Email: " + self.config.get('archive', 'email') + n
        f += "URL: "              + self.config.get('archive', 'url')
        path = os.path.join(self._tmp, self._archive+"arch.rdf")
        file = open(path, 'w')
        file.write(f)
        file.close()
 
    def make_series_file(self, partners):
        path = os.path.join(self._tmp, self._archive+"seri.rdf")
        file = open(path, 'w')
        s = ""
        for key in partners:
            s += """Template-type: ReDIF-Series 1.0
Name: Open Access publications from %(organisation)s
Provider-Name: %(organisation)s
Provider-Homepage: %(homepage)s
Maintainer-Name: %(name)s
Maintainer-Email: %(email)s
Handle: RePEc:ner:%(seriesid)s """ %{'organisation':partners[key]['Provider-Name'],
                                     'homepage'    :partners[key]['Provider-Homepage'],
                                     'name'        :partners[key]['Maintainer-Name'],
                                     'email'       :partners[key]['Maintainer-Email'],
                                     'seriesid'    :key}
            s += "\n\n"
        file.write(s)
        file.close()
               
    def partners(self):
        # get partner information
        partners = {}
        for key in self._seriesMap:
            id = self._seriesMap[key]
            url = self._baseURL + "?request=getPartner&partnerId=" + id
            partnerInfo = parse(url)
            status = partnerInfo.xpath('properties/entry[@key="neeo.partner.status"]/text()')[0]
            if status == "new": continue
            #yield tostring(partnerInfo)
            institutionPath = '/nereus:partner/rdf:RDF/foaf:Organization[rdf:type="info:eu-repo/semantics/institution"]'
            info = {}
            info['Provider-Name']     = partnerInfo.xpath(institutionPath+"/foaf:name/text()",                       nsMap)[0]
            info['Provider-Homepage'] = partnerInfo.xpath(institutionPath+"/foaf:homepage/@rdf:resource",            nsMap)[0]
            info['Maintainer-Name']   = partnerInfo.xpath(institutionPath+"/nereus:contacts/nereus:contact/@name",   nsMap)[0]
            info['Maintainer-Email']  = partnerInfo.xpath(institutionPath+"/nereus:contacts/nereus:contact/@e-mail", nsMap)[0]
            partners[key] = info
        return partners

    def make_metadata(self):
        log = open(os.path.join(self._tmp, "repec.log"), "w")
        for series in self._seriesMap:
            partnerId = self._seriesMap[series]
            path = os.path.join(self._tmp, series)
            #print "path="+path
            if not os.path.exists(path):
                os.mkdir(path)
            query = partnerId + ' and fulltext.url.exists exact true not mods.genre="info:eu-repo/semantics/workingpaper"'
            if series in self._excludeSet:
                query += ' not set=' + self._excludeSet[series]
            log.write("query=" + query + "\n")
            total, result, diag = self.make_metadata_file(query, series)
            log.write("total=" + str(total) + "\n")
            if diag: log.write(diag + "\n")
            log.write("\n")
            if total > 0:
                file = open(os.path.join(path, "metadata.rdf"), 'w')
                file.write(result)
                file.close()

# copied from enrichmentserver FIXME: class in separate package; same for the other classes that follow
class Tree(object):
    def __init__(self):
        self.root = None

    def get(self, method="xml", xml_declaration=False):
        if method == "Element":
            return self.root
        elif method == "xml":
            return tostring(self.root, xml_declaration=xml_declaration, encoding="utf-8")

class Document(Tree):
    def __init__(self, meta, record, part=""):
        self.root = Element('document', xmlns="http://meresco.com/namespace/harvester/document")
        SubElement(self.root, 'part', name="meta").text = meta
        if record: SubElement(self.root, 'part', name=part).text = record

# FIXME namespaces nu op twee plekken
diag_ns = 'http://www.loc.gov/zing/srw/diagnostic/'
meta_ns = 'http://meresco.com/namespace/harvester/meta'
mods_ns = 'http://www.loc.gov/mods/v3'
srw_ns  = 'http://www.loc.gov/zing/srw/'
ucp_ns  = 'info:lc/xmlns/update-v1'
ns = {'ddi':'http://www.icpsr.umich.edu/DDI',
      'diag': diag_ns,
      'didl': 'urn:mpeg:mpeg21:2002:02-DIDL-NS',
      'dii' : 'urn:mpeg:mpeg21:2002:01-DII-NS',
      'meta': meta_ns,
      'mods': mods_ns,
      'srw' : srw_ns,
      'ucp' : ucp_ns}

class UpdateRequest(Tree):
    def __init__(self, action, repository, recordIdentifier, document):
        self.ns = {"xmlns:srw": srw_ns, "xmlns:ucp": ucp_ns}
        self.root = Element('srw:updateRequest', self.ns)
        SubElement(self.root, 'srw:version').text = "1.0"
        self.action = SubElement(self.root, 'ucp:action')
        if action == "replace" : self.action.text = "info:srw/action/1/replace"
        elif action == "delete": self.action.text = "info:srw/action/1/delete"
        SubElement(self.root, 'ucp:recordIdentifier').text = repository + ":" + recordIdentifier
        if action == "replace":
            self.record = SubElement(self.root, 'srw:record')
            SubElement(self.record, 'srw:recordPacking').text = 'xml'
            SubElement(self.record, 'srw:recordSchema').text  = 'document'
            SubElement(self.record, 'srw:recordData').append(document)

    def send(self, port):
        # FIXME HTTPExceptions afvangen
        self.request = self.get(xml_declaration=True)
        #print self.request
        ## writer waarnaar de enrichment wordt gestuurd ##
        connection = HTTPConnection('localhost', port)
        connection.putrequest("POST", "/update")
        connection.putheader("Content-Type", "text/xml; charset=\"utf-8\"")
        connection.putheader("Content-Length", str(len(self.request)))
        connection.endheaders()
        connection.send(self.request)

        response = connection.getresponse()
        status = response.status
        content = fromstring(response.read())
        #print tostring(content)
        if status == 200:
            if content.xpath("//ucp:operationStatus/text()", ns)[0] == "success":
                return "success", ""
            else:
                return "fail", content.xpath("//diag:diagnostic/diag:details/text()", ns)[0]
        else:
            return "fail", "HTTP problem: %s" %status

def main():
    args=argv[1:]
    if len(args) != 1:
            print "supply one argument for the config file"
            exit()
    configFile = args[0]
    archive = ArchiveMaker(configFile)
    archive.make_archive_file()
    partners = archive.partners()
    archive.make_series_file(partners)
    archive.make_metadata()
    archive_dir = os.path.join(archive._docroot, archive._archive)
    rmtree(os.path.join(archive_dir))
    os.rename(archive._tmp, archive_dir)
    print "-- finished --"

if __name__ == '__main__':
        main()
