/*   **********************************************************************  **
 **   Copyright notice                                                       **
 **                                                                          **
 **   (c) 2003-2006 RSSOwl Development Team                                  **
 **   http://www.rssowl.org/                                                 **
 **                                                                          **
 **   All rights reserved                                                    **
 **                                                                          **
 **   This program and the accompanying materials are made available under   **
 **   the terms of the Eclipse Public License 1.0 which accompanies this     **
 **   distribution, and is available at:                                     **
 **   http://www.rssowl.org/legal/epl-v10.html                               **
 **                                                                          **
 **   A copy is found in the file epl-v10.html and important notices to the  **
 **   license from the team is found in the textfile LICENSE.txt distributed **
 **   in this package.                                                       **
 **                                                                          **
 **   This copyright notice MUST APPEAR in all copies of the file!           **
 **                                                                          **
 **   Contributors:                                                          **
 **     RSSOwl - initial API and implementation (bpasero@rssowl.org)         **
 **                                                                          **
 **  **********************************************************************  */

package net.sourceforge.rssowl.util.shop;

import net.sourceforge.rssowl.controller.GUI;
import net.sourceforge.rssowl.dao.ConnectionManager;
import net.sourceforge.rssowl.util.GlobalSettings;

import org.apache.xerces.util.EncodingMap;
import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Factory class for XML concerns in RSSOwl
 * 
 * @author <a href="mailto:bpasero@rssowl.org">Benjamin Pasero </a>
 * @version 1.2.3
 */
public class XMLShop {

  /** Doctype for the generated HTML */
  public static final String DOCTYPE_HTML_TRANSITIONAL = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">";

  /** A DTD URI to use for a non existing DTD */
  public static final String NULL_DTD = "http://www.null.dtd";

  /** Feature in Xerces to allow Java Encoding Names */
  public static final String ALLOW_JAVA_ENCODING = "http://apache.org/xml/features/allow-java-encodings";

  /** Some HTML tags do strip (doubles) */
  private static String[] stripDoubleTags = new String[] { "b", "i", "u", "em", "strong", "li", "ul", "ol", "p", "img", "blockquote", "font", "span", "div", "center", "area", "map", "form" };

  /** Some HTML tags do strip (singles) */
  private static String[] stripSingleTags = new String[] { "br", "p" };

  /** This utility class constructor is hidden */
  private XMLShop() {
  // Protect default constructor
  }

  /**
   * Try to grab the title from the given feed
   * 
   * @param url Url to the RSS or RDF feed
   * @param useProxy Flag indicating wether a proxy should be used to lookup the
   * title
   * @return The title from the feed or an empty String
   * @throws IOException if an error occurs
   */
  public static String getTitleFromFeed(String url, boolean useProxy) throws IOException {
    String title = "";

    /** If Feed is stored locally */
    if (new File(url).exists()) {

      /** Try to read Encoding out of XML Document */
      FileInputStream fileIns = new FileInputStream(new File(url));
      String encoding = getEncodingFromXML(new InputStreamReader(fileIns));
      try {
        fileIns.close();
      } catch (IOException e) {
        /** Fail Silently */
      }

      /** Avoid lowercase UTF-8 notation */
      if ("utf-8".equals(encoding))
        encoding = "UTF-8";

      /** Grab Title using supplied Encoding */
      fileIns = new FileInputStream(new File(url));
      if (StringShop.isset(encoding) && isEncodingSupported(encoding))
        title = getTitleFromFeed(new BufferedReader(new InputStreamReader(fileIns, encoding)));

      /** Grab Title using Default Encoding */
      else
        title = getTitleFromFeed(new BufferedReader(new InputStreamReader(fileIns)));

      /** Close Stream */
      try {
        fileIns.close();
      } catch (IOException e) {
        /** Fail Silently */
      }
    }

    /** Feed is online */
    else {

      /** Create a new Connection Manager and connect */
      ConnectionManager connectionManager = new ConnectionManager(url);
      connectionManager.connect(useProxy);

      /** Buffered Stream to support mark and reset */
      BufferedInputStream bufIns = new BufferedInputStream(connectionManager.getInputStream());
      bufIns.mark(8192);

      /** Try to read Encoding out of XML Document */
      String encoding = getEncodingFromXML(new InputStreamReader(bufIns));

      /** Avoid lowercase UTF-8 notation */
      if ("utf-8".equalsIgnoreCase(encoding))
        encoding = "UTF-8";

      /** Reset the Stream to its beginning */
      bufIns.reset();

      /** Grab Title using supplied Encoding */
      if (StringShop.isset(encoding) && isEncodingSupported(encoding))
        title = getTitleFromFeed(new BufferedReader(new InputStreamReader(bufIns, encoding)));

      /** Grab Title using Default Encoding */
      else
        title = getTitleFromFeed(new BufferedReader(new InputStreamReader(bufIns)));

      /** Close connection */
      connectionManager.closeConnection();
    }

    return title;
  }

  /**
   * Method to get the contents of XML as InputStream
   * 
   * @param url Url / Path to the XML
   * @return InputStream of the xml
   */
  public static InputStream getXMLStream(String url) {

    /** XML is located locally */
    if (new File(url).exists()) {
      try {
        return new FileInputStream(new File(url));
      } catch (FileNotFoundException e) {
        GUI.logger.log("getXMLStream()", e);
      }
    }

    /** XML is located in the internet */
    ConnectionManager connectionManager = new ConnectionManager(url);
    try {
      connectionManager.connect();
      return connectionManager.getInputStream();
    }

    /** In case of an IOException close the connection */
    catch (IOException e) {
      connectionManager.closeConnection();
      return null;
    }
  }

  /**
   * Check if the given Encoding is known to Apache Xerces XML Parser
   * 
   * @param encoding The encoding name as String
   * @return boolean TRUE in case the encoding is supported
   */
  public static boolean isEncodingSupported(String encoding) {

    /** Return on error */
    if (!StringShop.isset(encoding))
      return false;

    /** Check for present Java encoding name */
    if (EncodingMap.getIANA2JavaMapping(encoding.toUpperCase()) != null)
      return true;

    /** Check for present IANA encoding name */
    if (EncodingMap.getJava2IANAMapping(encoding) != null)
      return true;

    /** Not supported encoding */
    return false;
  }

  /**
   * Check if the given document is a valid "user.xml"
   * 
   * @param document The document to check for valid user xml
   * @return TRUE if document is valid
   */
  public static boolean isValidUserXML(Document document) {
    return (document != null && document.getRootElement() != null && document.getRootElement().getName().equals("rssowl"));
  }

  /**
   * Check if the file at the given inputstream is a valid "user.xml"
   * 
   * @param inputStream InputStream holding the contents of the XML
   * @return TRUE if file is valid
   */
  public static boolean isValidUserXML(InputStream inputStream) {
    SAXBuilder builder = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
    XMLShop.setDefaultEntityResolver(builder);

    try {
      return isValidUserXML(builder.build(inputStream));
    }

    /** In any case of an exception return FALSE */
    catch (IOException e) {
      return false;
    } catch (JDOMException e) {
      return false;
    } catch (IllegalArgumentException e) {
      return false;
    }
  }

  /**
   * Check if the file at the given path is a valid "user.xml"
   * 
   * @param path Path to the import file
   * @return TRUE if file is valid
   */
  public static boolean isValidUserXML(String path) {
    try {
      return isValidUserXML(new FileInputStream(path));
    } catch (FileNotFoundException e) {
      return false;
    }
  }

  /**
   * Ignore the DTD of the XML and set the default DTD "entities.dtd" that
   * provides common entities taken from the HTML 4.01 Spec (W3C).
   * 
   * @param builder The builder to set the default Entityresolver
   */
  public static void setDefaultEntityResolver(SAXBuilder builder) {
    builder.setEntityResolver(new EntityResolver() {

      /**
       * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String,
       * java.lang.String)
       */
      public InputSource resolveEntity(String publicId, String systemId) {
        return new InputSource(GUI.class.getResourceAsStream("/usr/entities.dtd"));
      }
    });
  }

  /**
   * Strip some HTML tags from the given text.
   * 
   * @param text The text to strip the HTML tags from
   * @return String HTML stripped text
   */
  public static String stripSimpleHTMLTags(String text) {

    /** Replace some common HTML tags with new-line */
    if (text != null && text.indexOf("<") >= 0) {

      /** Strip common HTML tags (singles) */
      for (int a = 0; a < stripSingleTags.length; a++) {

        /** Lowercase */
        text = text.replaceAll("<" + stripSingleTags[a] + "[^>]*>", "");
        text = text.replaceAll("<" + stripSingleTags[a] + "[^>]*/>", "");

        /** Uppercase */
        text = text.replaceAll("<" + stripSingleTags[a].toUpperCase() + "[^>]*>", "");
        text = text.replaceAll("<" + stripSingleTags[a].toUpperCase() + "[^>]*/>", "");
      }

      /** Strip common HTML tags (doubles) */
      for (int a = 0; a < stripDoubleTags.length; a++) {

        /** Lowercase */
        text = text.replaceAll("<" + stripDoubleTags[a] + "[^>]*>", "");
        text = text.replaceAll("</" + stripDoubleTags[a] + ">", "");

        /** Uppercase */
        text = text.replaceAll("<" + stripDoubleTags[a].toUpperCase() + "[^>]*>", "");
        text = text.replaceAll("</" + stripDoubleTags[a].toUpperCase() + ">", "");
      }

      /** Strip Comments */
      text = text.replaceAll("<!\\-\\-[^\\-]*\\-\\->", "");
    }
    return text;
  }

  /**
   * Write the document as XML file to the given path
   * 
   * @param document The XML Document
   * @param file The file where to write the XML contents
   */
  public static void writeXML(Document document, File file) {
    XMLOutputter outp = new XMLOutputter(getXMLFormat());

    FileOutputStream fOs = null;
    try {
      fOs = new FileOutputStream(file);
      outp.output(document, fOs);
    } catch (IOException e) {
      GUI.logger.log("writeXML()", e);
    } finally {
      if (fOs != null)
        try {
          fOs.close();
        } catch (IOException e) {
          GUI.logger.log("writeXML()", e);
        }
    }
  }

  /**
   * Write the document as XML file to the given path
   * 
   * @param document The XML Document
   * @param filePath The file where to write the XML contents
   */
  public static void writeXML(Document document, String filePath) {
    writeXML(document, filePath, false);
  }

  /**
   * Write the document as XML file to the given path
   * 
   * @param document The XML Document
   * @param filePath The file where to write the XML contents
   * @param deleteOnExit If TRUE, delete the XML on exit
   */
  public static void writeXML(Document document, String filePath, boolean deleteOnExit) {
    File newFile = new File(filePath);
    if (deleteOnExit)
      newFile.deleteOnExit();
    writeXML(document, newFile);
  }

  /**
   * This Method will read the first line of the given Reader and try to read
   * out the Encoding. A very typical first line of most XML Documents is for
   * example: <?xml encoding="UTF-8"?>
   * 
   * @param inputReader The Reader that should have a XML as content.
   * @return String Either the found Encoding or NULL.
   * @throws IOException In case of an Error.
   */
  private static String getEncodingFromXML(InputStreamReader inputReader) throws IOException {
    String encoding = null;

    /** Read the first line or until the Tag is closed */
    StringBuffer strBuf = new StringBuffer();
    char c;
    while ((c = (char) inputReader.read()) != -1) {

      /** Append all Characters, except for closing Tag or CR */
      if (c != '>' && c != '\n' && c != '\r')
        strBuf.append(c);

      /** Closing Tag is the last one to append */
      else if (c == '>') {
        strBuf.append(c);
        break;
      }

      /** End of Line or Tag reached */
      else
        break;
    }

    /** Save the first Line */
    String firstLine = strBuf.toString();

    /** Look if Encoding is supplied */
    if (firstLine.indexOf("encoding") >= 0) {

      /** Extract the Encoding Value */
      String regEx = "<\\?.*encoding=[\"'](.*)[\"'].*\\?>";
      Pattern pattern = Pattern.compile(regEx);
      Matcher match = pattern.matcher(firstLine);

      /** Get first matching String */
      if (match.find())
        return match.group(1);
    }
    return encoding;
  }

  /**
   * Get the title from an InputStream.
   * 
   * @param inputReader A BufferedReader for the input
   * @throws IOException if an error occurs
   * @return Title of the file if found
   */
  private static String getTitleFromFeed(BufferedReader inputReader) throws IOException {
    String title = "";
    String firstLine;
    boolean titleFound = false;

    /** Read the file until the Title is found or EOF is reached */
    while (true) {

      /** Will throw an IOException on EOF reached */
      firstLine = inputReader.readLine();

      /** EOF reached */
      if (firstLine == null)
        break;

      /** If the line contains the title, break loop */
      if (firstLine.indexOf("<title") >= 0 && firstLine.indexOf("</title>") >= 0) {
        title = firstLine.trim();
        titleFound = true;
        break;
      }
    }

    /** Return if no title was found */
    if (!titleFound)
      return title;

    /** Extract the title String */
    String regEx = "<title[^>]*>[^<]*</title>";
    Pattern pattern = Pattern.compile(regEx);
    Matcher match = pattern.matcher(title);

    /** Get first matching String */
    if (match.find())
      title = match.group();

    /** Decode possible XML special chars */
    title = parseString(title);

    return title;
  }

  /**
   * Create a Format to use with the XMLOutputter
   * 
   * @return Format The format to use with XMLOutputter
   */
  private static Format getXMLFormat() {

    /** Create Format for XML output */
    Format outputFormat = Format.getPrettyFormat();

    /** Set encoding of the XML */
    if (Charset.isSupported(GlobalSettings.charEncoding) && isEncodingSupported(GlobalSettings.charEncoding))
      outputFormat.setEncoding(GlobalSettings.charEncoding);

    return outputFormat;
  }

  /**
   * Parse a String as XML to have possible entities resolved.
   * 
   * @param str The String to parse
   * @return The parsed String
   */
  private static String parseString(String str) {

    /** Init SAXBuilder */
    SAXBuilder builder = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
    setDefaultEntityResolver(builder);
    Document document;

    /**
     * We have to add a DTD ref here to use RSSOwl's custom DTD set in the
     * "setDefaultEntityResolver(builder)". It is not relevant, that the DTD is
     * not existing that is referenced here. The Doctype is overriden by RSSOwl
     * anyways.
     */
    str = "<?xml version=\"1.0\"?><!DOCTYPE Meta SYSTEM \"" + NULL_DTD + "\">" + str;

    /** StringReader to parse the String from */
    StringReader strR = new StringReader(str);

    /** Parse the document */
    try {
      document = builder.build(strR);

      /** Get the parsed title */
      return document.getRootElement().getTextNormalize();
    }

    /** Flag exception */
    catch (IOException e) {
      /** Return str as Title */
    }

    /** Flag exception */
    catch (JDOMException e) {
      /** Return str as Title */
    }

    /** Flag exception */
    catch (IllegalArgumentException e) {
      /** Return str as Title */
    }

    /** Remove the title tags (also delete attributes in title tag) */
    str = str.replaceAll("<title[^>]*>", "");
    str = str.replaceAll("</title>", "");
    return str;
  }
}