from os import mkdir, makedirs, chdir, getcwd, listdir
from os.path import isfile, isdir, exists, dirname, realpath, getsize, walk
from re import match, escape, search, sub
from shutil import copytree
from HTMLParser import HTMLParser, HTMLParseError, piclose
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler 
from urllib import unquote
from time import sleep
from mimetypes import guess_type
import errno

# import PyCHM bindings
from chm import chmlib

# which config file to use - local or global?
if exists("arch.conf"):
    config = "arch.conf"
else:
    config = "/etc/archmage/arch.conf"

def listdir_r(dir):
    def f(res, dir, files):
        for e in files:
            d = '/'.join(dir.split('/')[1:])
            if d: d += '/'
            res.append(d + e)
    res = []
    walk(dir, f, res)
    return res


class CHMDir(object):
    """Class that represent CHM content from directory"""

    def __init__(self, name):
        # Name of source directory with CHM content
        self.sourcename = name
        # Import variables from config file into namespace
        execfile(config, self.__dict__)
        
        # Get all entries
        self.entries = self.get_entries(name)
        # Get template files
        self.templates = self.get_templates()
        # Get 'Table of Contents'
        for e in self.entries:
            if e.endswith('.hhc'):
                self.hhc = e
            if e.endswith('.hhk'):
                self.hhk = e
        hhclines = self.get_entry_by_name(self.hhc)
        self.contents, self.deftopic = SitemapFile(hhclines).parse()

    def get_entries(self, name):
        """Get all entries"""
        entries = list()
        for fname in listdir_r(name):
            name = '/' + fname
            if isdir(self.sourcename + name):
                name += '/'
            entries.append(name)
        return entries

    def get_entry_by_name(self, name):
        """Get CHM entry by it's name"""
        if name == '/':
            name = '/index.html'
        if name in self.templates:
            return self.get_template_by_name(name)
        if name.lower() in ['/icons/' + icon.lower()
							for icon in listdir(self.templates_dir + '/icons/')]:
            return open(self.templates_dir + name).read()
        for e in self.entries:
            if e.lower() == name.lower():
                return CHMEntry(self, e).get()
        else:
            raise NameError, "there is no " + name

    def sub_mytag(self,re):
        """docstring should be here"""
        try:
            res = eval("self." + re.group(1))
        except:
            res = eval(re.group(1))
        return res

    def get_templates(self):
        """Get list of all template files"""
        return [ '/'+file for file in listdir(self.templates_dir)
                    if isfile(self.templates_dir + file) ]

    def get_template_by_name(self, name):
        """Get template file by it's name"""
        
        s = open(self.templates_dir + name).read()
        return sub('\<%(.+?)%\>', self.sub_mytag, s)

    def process_templ(self):
        """docstring should be here"""
        for template in self.templates:
            open(template[1:],'w').write(self.get_template_by_name(template))
        if not exists('icons/'):
            copytree(self.templates_dir+'/icons/','icons/')

    def raw_extract(self):
        """docstring should be here"""
        aux_re='|'.join([ escape(s) for s in self.auxes])
        for e in self.entries:
            if match(aux_re, e):
                continue
            fname=e[1:]
            fname=fname.lower()
            dname=dirname(fname)
            if e[-1:]=='/':
                makedirs(fname)
            else:
                if dname and not exists(dname):
                    makedirs(dname)
                if self.fs_encoding:
                    fname=fname.decode('utf-8').encode(self.fs_encoding)
                (open(fname, 'w')).writelines(CHMEntry(self, e).get())

    def extract(self,dir):
        """Extract CHM file content into fs"""
        try:
            mkdir(dir)
            chdir(dir)
            self.raw_extract()
            self.process_templ()
        except OSError, error:
            if error[0] == errno.EEXIST:
                print "Decompilation error: Directory '%s' already exists!" % dir

class CHMFile(CHMDir):
    """CHM file class derived from CHMDir"""
    
    def get_entries(self, name):
        """Overrided method"""
        entries = []
        self._handler = chmlib.chm_open(name)
        for name in self.get_names(self._handler):
            if (name == '/'):
                continue
            entries.append(name)
        return entries
    
    def get_names(self, chmfile):
        def _get_name(chmfile, ui, context):
            context.append(ui.path)
            return chmlib.CHM_ENUMERATOR_CONTINUE
		
        chmdir = []
        if (chmlib.chm_enumerate(chmfile, chmlib.CHM_ENUMERATE_ALL, _get_name, chmdir)) == 0:
            print("   *** ERROR ***")
        return chmdir

    def __del__(self):
        # Close CHM file handler on class destroing
        chmlib.chm_close(self._handler)


class CHMEntry(object):
    """CHM file entry"""

    def __init__(self, parent ,name):
        self.name = name
        self.parent = parent

    def read(self):
        """Read entry content"""
        # Check where parent instance is CHMFile or CHMDir
        if isinstance(self.parent, CHMFile):
            result, ui = chmlib.chm_resolve_object(self.parent._handler, self.name)
            if (result != chmlib.CHM_RESOLVE_SUCCESS):
                return None
			
            size, text = chmlib.chm_retrieve_object(self.parent._handler, ui, 0L, ui.length)
            if (size == 0):
                return None
            return text
        else:
            return open(self.parent.sourcename + self.name).read()

    def lower_links(self, text):
        """Links to lower case"""
        return sub('(?i)(href|src)\s*=\s*([^\s|>]+)', lambda m:m.group(0).lower(), text)

    def add_restoreframing_js(self, name, text):
        name = sub('/+', '/', name)
        depth = name.count('/')

        js = """<body><script language="javascript">
        if ((window.name != "content") && (navigator.userAgent.indexOf("Opera") <= -1) )
        document.write("<center><a href='%sindex.html?page=%s'>show framing</a></center>")
        </script>""" % ( '../'*depth, name )
		
        return sub('(?i)<\s*body\s*>', js, text)

    def get(self):
        """Get CHM content"""
        lines = self.read()
        # If entry is a html page?
        if search('(?i)\.html?$', self.name):
            # lower-casing links if needed
            if self.parent.filename_case:
                lines = self.lower_links(lines)
            # restore framing if that option is set in config file
            if self.parent.restore_framing:
                lines = self.add_restoreframing_js(self.name[1:], lines)
        if lines is not None:
            return lines
        else:
            return str()


class SitemapFile(object):
    """Sitemap file class"""
    
    def __init__(self, lines):
        self.lines = lines
        
    def parse(self):
        p = SitemapParser()
        p.feed(self.lines)
        return (p.parsed + "\n]", p.deftopic)


class TagStack(list):
    """ from book of David Mertz "Text Processing in Python" """
    
    def append(self, tag):
        # Remove every paragraph-level tag if this is one
        if tag.lower() in ('p', 'blockquote'):
            self = TagStack([t for t in super
                        if t not in ('p', 'blockquote')])
        super(TagStack, self).append(tag)
            
    def pop(self, tag):
        # "Pop" by tag from nearest pos, not only last item
        self.reverse()
        try:
            pos = self.index(tag)
        except ValueError:
            raise HTMLParser.HTMLParseError, "Tag not on stack"
        self[:] = self[pos + 1:]
        self.reverse()
        

class SitemapParser(HTMLParser):
    """ Class for parsing files in SiteMap format, such as .hhc """

    def __init__(self):
        self.tagstack = TagStack()
        self.params = {}
        self.parsed = ""
        self.deftopic = ""
        HTMLParser.__init__(self)
        
    def handle_starttag(self, tag, attrs):
        # first ul, start processing from here
        if tag == 'ul' and not self.tagstack:
            self.tagstack.append(tag)
            self.parsed+="\n["
        # if inside ul
        elif self.tagstack:
            if tag == 'li':
                if self.tagstack[-1] != 'ul':
                    self.parsed += "]"
                    self.tagstack.pop('li')
                indent = " " * len(self.tagstack)
                if self.parsed != "\n[":
                    self.parsed += ','
                self.parsed += "\n" + indent + "["
            if tag == 'param':
                self.params[dict(attrs)['name']]=dict(attrs)['value']
            self.tagstack.append(tag)

    def handle_endtag(self, tag):
        # if inside ul
        if self.tagstack:
            if tag == 'ul':
                self.parsed += "]"
            if tag == 'object':
                if not self.params.has_key('ImageNumber'):
                    self.params['ImageNumber'] = 1
                if not self.params.has_key('Local'):
                    self.params['Local'] = ''
                if not self.params.has_key('Name'):
                    self.params['Name'] = ''
                if not self.deftopic:
                    self.deftopic = self.params['Local'].lower()

                self.params['Name'] = self.params['Name'].replace("\r\n", "\\n").replace("\n", "\\n")

                if '"' in self.params['Local']:
                    lstr = "'%s'"
                    self.params['Local'] = self.params['Local'].replace("'", '\\\'')
                else:
                    lstr = '"%s"'
                    self.params['Local'] = self.params['Local'].replace('"', "\\\"")

                if '"' in self.params['Name']:
                    nstr = "'%s'"
                    self.params['Name'] = self.params['Name'].replace("'", '\\\'')
                else:
                    nstr = '"%s"'
                    self.params['Name'] = self.params['Name'].replace('"', "\\\"")

                fstr = nstr + "," + lstr + "," + '"%s"'
                self.parsed += fstr % (
                    self.params['Name'],
                    self.params['Local'].lower(),
                    self.params['ImageNumber'])
                self.params = {}
            if tag != 'li':
                self.tagstack.pop(tag)

    def parse_starttag(self, i):
        try:
            return HTMLParser.parse_starttag(self, i)
        except HTMLParseError:
            try:
                return piclose.search(self.rawdata, i).end()
            except AttributeError:
                return -1
    
    def parse_endtag(self, i):
        try:
            return HTMLParser.parse_endtag(self, i)
        except HTMLParseError:
            try:
                return piclose.search(self.rawdata, i).end()
            except:
                return -1


class CHMServer(HTTPServer):
    """HTTP Server that handle Compressed HTML"""

    def __init__(self, CHM, name='', port=8000):
        self.address = (name, port)
        self.httpd = HTTPServer(self.address, CHMRequestHandler)
        self.httpd.CHM = CHM
        self.address = (name, port)

    def run(self):
        self.httpd.serve_forever()


class CHMRequestHandler(BaseHTTPRequestHandler):
    """This class handle HTTP request for CHMServer"""
    
    def do_GET(self):
        pagename = unquote(self.path.split('?')[0])
        if pagename == '/':
            mimetype = 'text/html'
        else:
            mimetype = guess_type(pagename)[0]

        self.send_response(200)
        self.send_header("Content-type", mimetype)
        self.end_headers()

        # get html data from CHM instance and write it into output
        self.wfile.write(self.server.CHM.get_entry_by_name(pagename))
