import os
from ConfigParser import ConfigParser
from datetime import date
from httplib import HTTPConnection
from lxml.etree import Element, SubElement, fromstring, parse, tostring, XSLT
from shutil import rmtree
from StringIO import StringIO
from sys import argv
from urllib import urlopen
from xml.sax.saxutils import escape as xml_escape

# FIXME nsMap uit config gebruiken
nsMap = {
    'ddi'   :"http://www.icpsr.umich.edu/DDI",
    'dii'   :"urn:mpeg:mpeg21:2002:01-DII-NS",
    'didl'  :"urn:mpeg:mpeg21:2002:02-DIDL-NS",
    'dip'   :"urn:mpeg:mpeg21:2005:01-DIP-NS",
    'foaf'  :"http://xmlns.com/foaf/0.1/",
    'oai'   :'http://www.openarchives.org/OAI/2.0/',
    'mods'  :"http://www.loc.gov/mods/v3",
    'nereus':"http://www.nereus4economics.info/",
    'rdf'   :"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    'srw'   :"http://www.loc.gov/zing/srw/",
    'extra' :"http://meresco.com/namespace/fields/extra",
    'meta'  :"http://meresco.com/namespace/harvester/meta",
}

def getSectionDict(config,  section = ''):
# returns a hash of an entire section
    hash = {}
    options  = config.items(section)
    for option,value in options:
        hash[option] = value
    return hash

class SitemapMaker():
    def __init__(self, configfile):
        self.config = ConfigParser()
        self.config.read(configfile)
        self._docroot = self.config.get('main', 'docroot')
        self._server  = self.config.get('main', 'server')
        self._baseURL = self.config.get('partnersinfo', 'baseURL')
        self._maximumRecords = int(self.config.get('reader', 'maximumRecords'))
        self._tmp = os.path.join(self._docroot, "_tmp")
        rmtree(self._tmp, ignore_errors=True)
        os.mkdir(self._tmp)

    def make_sitemap_file(self, query):
        stop = self._maximumRecords
        # FIXME sortering
        #sortBy =  ''
        #sortDescending = True
        baseURL = self.config.get('reader', 'baseURL')
        url = baseURL + \
                "?version=1.1&operation=searchRetrieve&maximumRecords=%s&recordSchema=meta&x-recordSchema=header&query=%s" %(stop, query)
        s = ""
        diag = ""
        try:
            sruResponse = parse(urlopen(url))
        except IOError, (errno, strerror):
            diag += "ERROR url=%s errno=%s strerror=%s" % (url, errno, strerror)
            return -1, s, diag
        total = int(sruResponse.xpath("//srw:numberOfRecords/text()", nsMap)[0])
            
        if total == 0:
            ## no publications found ##
            diag += "WARNING no records found url=%s " % (url)
            return 0, s, diag
        
        s = """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
"""
        records = sruResponse.xpath("//srw:record", nsMap)
        for record in records:
            identifier = record.xpath("srw:recordData/meta:meta/meta:upload/meta:id/text()", nsMap)[0]
            datestamp = record.xpath('srw:extraRecordData/recordData[@recordSchema="header"]/oai:header/oai:datestamp/text()', nsMap)[0]
            s += """ <url>
  <loc>http://www.economistsonline.org/publications/?id=%s</loc>
  <lastmod>%s</lastmod>
  <changefreq>yearly</changefreq>
""" %(identifier, datestamp)
            s += """ </url>
"""
        s += """</urlset>"""
        return total, s, diag

    def make_index_file(self, partners):
        path = os.path.join(self._tmp, "sitemap_index.xml")
        file = open(path, 'w')
        s = """<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
"""
        for partner in partners:
            s += """ <sitemap>
  <loc>%s/sitemap/%s.xml</loc>
 </sitemap>
""" %(self._server, partner) 
        s += "</sitemapindex>"
        file.write(s)
        file.close()
               
    def partners(self):
        # get partner information
        partners = []
        url = self._baseURL + "?request=listPartners"
        xml = parse(url)
        partnersInfo = xml.xpath('nereus:partner', nsMap)
        for p in partnersInfo:
            status = p.xpath('properties/entry[@key="neeo.partner.status"]/text()')[0]
            if status == "new": continue
            partners.append(p.xpath('@id')[0])
        return partners

    def make_sitemaps(self, partners):
        log = open(os.path.join(self._tmp, "sitemap.log"), "w")
        for partnerId in partners:
            query = partnerId + ' and fulltext.url.exists exact true not mods.genre="info:eu-repo/semantics/workingpaper"'
            log.write("query=" + query + "\n")
            total, result, diag = self.make_sitemap_file(query)
            log.write("total=" + str(total) + "\n")
            if diag: log.write(diag + "\n")
            log.write("\n")
            if total > 0:
                file = open(os.path.join(self._tmp, partnerId + ".xml"), 'w')
                file.write(result)
                file.close()

def main():
    args=argv[1:]
    if len(args) != 1:
            print "supply one argument for the config file"
            exit()
    configFile = args[0]
    maker = SitemapMaker(configFile)
    partners = maker.partners()
    print partners
    maker.make_index_file(partners)
    maker.make_sitemaps(partners)
    sitemap_dir = os.path.join(maker._docroot, "sitemap")
    rmtree(os.path.join(sitemap_dir))
    os.rename(maker._tmp, sitemap_dir)
    print "-- finished --"

if __name__ == '__main__':
        main()
