/*   **********************************************************************  **
 **   Copyright notice                                                       **
 **                                                                          **
 **   (c) 2003-2006 RSSOwl Development Team                                  **
 **   http://www.rssowl.org/                                                 **
 **                                                                          **
 **   All rights reserved                                                    **
 **                                                                          **
 **   This program and the accompanying materials are made available under   **
 **   the terms of the Eclipse Public License 1.0 which accompanies this     **
 **   distribution, and is available at:                                     **
 **   http://www.rssowl.org/legal/epl-v10.html                               **
 **                                                                          **
 **   A copy is found in the file epl-v10.html and important notices to the  **
 **   license from the team is found in the textfile LICENSE.txt distributed **
 **   in this package.                                                       **
 **                                                                          **
 **   This copyright notice MUST APPEAR in all copies of the file!           **
 **                                                                          **
 **   Contributors:                                                          **
 **     RSSOwl - initial API and implementation (bpasero@rssowl.org)         **
 **                                                                          **
 **  **********************************************************************  */

package net.sourceforge.rssowl.dao.feedparser;

import net.sourceforge.rssowl.controller.GUI;
import net.sourceforge.rssowl.dao.NewsfeedFactoryException;
import net.sourceforge.rssowl.model.Channel;
import net.sourceforge.rssowl.model.ChannelImage;
import net.sourceforge.rssowl.model.Enclosure;
import net.sourceforge.rssowl.model.NewsItem;
import net.sourceforge.rssowl.util.shop.RegExShop;
import net.sourceforge.rssowl.util.shop.StringShop;

import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.Text;
import org.jdom.output.XMLOutputter;

import java.io.IOException;
import java.io.StringWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.List;

/**
 * Parser for the Atom 1.0 Format <br />
 * Specification: http://www.atomenabled.org/developers/syndication/
 * 
 * @author <a href="mailto:bpasero@rssowl.org">Benjamin Pasero </a>
 * @version 1.2.3
 */
public class Atom_1_0_Parser extends AbstractFeedParser {

  /** Namespace: XHTML */
  private static final Namespace xhtml = Namespace.getNamespace("http://www.w3.org/1999/xhtml");

  /**
   * Instantiate a new Parser for this format.
   * 
   * @param document The document containing the data to parse
   * @param rssChannel The Channel to fill with data from the document
   * @param url The URL of the Newsfeed that is parsed
   * @param nameSpaces Possible NameSpaces of the XML document
   * @throws NewsfeedFactoryException If any error occurs
   */
  public Atom_1_0_Parser(Document document, Channel rssChannel, String url, Namespace nameSpaces[]) throws NewsfeedFactoryException {
    super(document, rssChannel, url, nameSpaces);
  }

  /**
   * Resolve the given String to the given Base URI.
   * 
   * @param base The Base URI to use for resolving.
   * @param rel The relative to resolve against the URI.
   * @return The resolved URI as String.
   */
  private String resolve(URI base, String rel) {

    /** Base URI or Rel String is not given */
    if (base == null || !StringShop.isset(rel))
      return rel;

    /** Base URI and Rel String is given */
    try {
      return base.resolve(rel).toString();
    } catch (IllegalArgumentException e) {
      return rel;
    }
  }

  /**
   * Get a text-value from the atom document. The text can either be of the type
   * "text", "html" or "xhtml".
   * 
   * @param element The element's name
   * @param parent The parent element
   * @return String the value or null
   */
  protected String getChildText(String element, Element parent) {
    if (parent == null)
      return null;

    Element child = getChildElement(parent, element);
    if (child == null)
      return getChildValue(element, parent);

    /** Determine the Type */
    String type = getAttributeValue(child, "type");

    /** Type: Plain Text with no entity escaped HTML */
    if (type == null || "text".equals(type))
      return getChildValue(element, parent);

    /** Type: Entity escaped HTML */
    else if ("html".equals(type))
      return getChildValue(element, parent);

    /** Type: Inline xhtml, wrapped in a div element */
    else if ("xhtml".equals(type)) {

      /** Get the div containing the XML */
      Element xmlDiv = getChildElement(child, "div", xhtml);

      /** Feed may use the BODY tag instead of DIV */
      if (xmlDiv == null)
        xmlDiv = getChildElement(child, "body", xhtml);

      /** Get text into a String if provided */
      if (xmlDiv != null) {
        XMLOutputter out = new XMLOutputter();
        StringWriter writer = new StringWriter();
        try {
          out.output(xmlDiv, writer);
          writer.close();
        } catch (IOException e) {
          GUI.logger.log("parseAtomVersion_0_3()", e);
        }

        /** Get Text */
        String content = writer.toString();

        /**
         * Problem: This Method of writing the content of the xmlDiv into the
         * StringWriter is not taking care of any CDATA-Constructs inside, as no
         * XML-parsing is done. The workaround is to manually remove the
         * CDATA-Tags. This has the same effect as parsing, since CDATA is to be
         * taken as is, with no element or entitiy processing.
         */
        if (content.indexOf("<![CDATA[") >= 0) {
          content = StringShop.replaceAll(content, "<![CDATA[", "");
          content = StringShop.replaceAll(content, "]]>", "");
        }

        /** Return text */
        return content;
      }
    }

    /** If type is unknown, fall back to default */
    return getChildValue(element, parent);
  }

  /**
   * @see net.sourceforge.rssowl.dao.feedparser.AbstractFeedParser#parse()
   */
  protected void parse() {

    /** Temp String */
    String str;

    /** Language */
    str = getAttributeValue(root, "lang", Namespace.XML_NAMESPACE);
    if (StringShop.isset(str))
      rssChannel.setLanguage(str);

    /** Feed XML Base */
    str = getAttributeValue(root, "base", Namespace.XML_NAMESPACE);
    URI feedBaseUri = null;
    if (StringShop.isset(str))
      try {
        feedBaseUri = new URI(str);
      } catch (URISyntaxException e) {
        /* Ignore */
      }

    /** Title of feed */
    str = getChildText("title", root);
    if (StringShop.isset(str))
      rssChannel.setTitle(Text.normalizeString(StringShop.stripTags(str)));

    /** Link(s) of feed */
    List links = getChildren(root, "link");
    for (int a = 0; links != null && a < links.size(); a++) {
      Element link = (Element) links.get(a);

      /** Check the relation attribute */
      String rel = getAttributeValue(link, "rel");

      /** Set as homepage if rel equals "alternate" or link is the last one */
      if (rel == null || rel.equals("alternate") || a == (links.size() - 1))
        rssChannel.setHomepage(resolve(feedBaseUri, getAttributeValue(link, "href")));

      /** Set as base URI if rel equals "self" */
      else if ("self".equals(rel) && feedBaseUri == null) {
        String selfUri = getAttributeValue(link, "href");
        if (StringShop.isset(selfUri)) {
          try {
            feedBaseUri = new URI(selfUri);
          } catch (URISyntaxException e) {
            /* Ignore */
          }
        }
      }
    }

    /** Description of feed */
    str = getChildText("subtitle", root);
    if (StringShop.isset(str))
      rssChannel.setDescription(Text.normalizeString(StringShop.stripTags(str)));

    /** Last update of feed */
    str = getChildValue("updated", root);
    if (StringShop.isset(str))
      rssChannel.setPubDate(str);

    /** Author of feed */
    Element author = getChildElement(root, "author");
    if (author != null) {
      str = getChildValue("name", author);
      if (StringShop.isset(str))
        rssChannel.setCreator(str);
    }

    /** Generator of feed */
    str = getChildValue("generator", root);
    if (StringShop.isset(str))
      rssChannel.setGenerator(str);

    /** Logo */
    str = getChildValue("logo", root);
    if (StringShop.isset(str) && RegExShop.isValidURL(str)) {
      ChannelImage image = new ChannelImage();
      image.setImgUrl(resolve(feedBaseUri, str));
      image.setLink(rssChannel.getHomepage());
      rssChannel.setImage(image);
    }

    /** Copyright of feed */
    str = getChildValue("rights", root);
    if (StringShop.isset(str))
      rssChannel.setCopyright(str);

    /** Entries of the feed */
    List entries = getChildren(root, "entry");
    Iterator entriesIt = entries.iterator();
    while (entriesIt.hasNext()) {
      Element entry = (Element) entriesIt.next();
      NewsItem rssNewsItem = new NewsItem();

      /** News XML Base */
      str = getAttributeValue(entry, "base", Namespace.XML_NAMESPACE);
      URI newsBaseUri = null;
      if (StringShop.isset(str))
        try {
          newsBaseUri = new URI(str);
        } catch (URISyntaxException e) {
          /* Ignore */
        }

      /** Resolve Feed and News Base URI */
      if (feedBaseUri != null && newsBaseUri != null)
        newsBaseUri = feedBaseUri.resolve(newsBaseUri);

      /** Take the Feed Base URI as News Base URI */
      else if (feedBaseUri != null)
        newsBaseUri = feedBaseUri;

      /** Base URI */
      if (newsBaseUri != null)
        rssNewsItem.setBaseUri(newsBaseUri.toString());

      /** Title */
      str = getChildText("title", entry);
      if (StringShop.isset(str))
        rssNewsItem.setTitle(str);

      /** Link(s) of feed */
      List newsLinks = getChildren(entry, "link");
      for (int a = 0; newsLinks != null && a < newsLinks.size(); a++) {
        Element newsLink = (Element) newsLinks.get(a);

        /** Check the relation attribute */
        String rel = getAttributeValue(newsLink, "rel");

        /** News Link */
        if (rel == null || rel.equals("alternate"))
          rssNewsItem.setLink(resolve(newsBaseUri, getAttributeValue(newsLink, "href")));

        /** Enclosure */
        else if ("enclosure".equals(rel)) {
          String type = getAttributeValue(newsLink, "type");
          String length = getAttributeValue(newsLink, "length");
          String url = resolve(newsBaseUri, getAttributeValue(newsLink, "href"));

          Enclosure enclosure = new Enclosure();
          enclosure.setType(type);
          enclosure.setLength(length);
          enclosure.setUrl(url);

          rssNewsItem.insertEnclosure(enclosure);
        }
      }

      /** Take Id as Link if Link is still NULL */
      if (rssNewsItem.getLink() == null) {
        str = resolve(newsBaseUri, getChildValue("id", entry));
        if (StringShop.isset(str) && RegExShop.isValidURL(str))
          rssNewsItem.setLink(str);
      }

      /** Content */
      str = getChildText("content", entry);
      if (StringShop.isset(str))
        rssNewsItem.setDescription(Text.normalizeString(str));

      /** Content was not available, use Summary then */
      if (!StringShop.isset(rssNewsItem.getDescription())) {

        /** Summary */
        str = getChildText("summary", entry);
        if (StringShop.isset(str))
          rssNewsItem.setDescription(Text.normalizeString(str));
      }

      /** Set part of description as title if title is not available */
      if (!StringShop.isset(rssNewsItem.getTitle()) && rssNewsItem.getDescription() != null)
        rssNewsItem.setTitle(rssNewsItem.getDescription(), true);

      /** Updated */
      str = getChildValue("updated", entry);
      if (StringShop.isset(str)) {
        rssNewsItem.setPubDate(str, true);
        rssChannel.addAvailableNewsItemInfo("TABLE_HEADER_PUBDATE");
      }

      /** PubDate */
      if (rssNewsItem.getPubDate() == null) {
        str = getChildValue("published", entry);
        if (StringShop.isset(str)) {
          rssNewsItem.setPubDate(str, true);
          rssChannel.addAvailableNewsItemInfo("TABLE_HEADER_PUBDATE");
        }
      }

      /** Author */
      Element newsauthor = getChildElement(entry, "author");
      if (newsauthor != null) {
        str = getChildValue("name", newsauthor);
        if (StringShop.isset(str)) {
          rssNewsItem.setAuthor(str);
          rssChannel.addAvailableNewsItemInfo("TABLE_HEADER_AUTHOR");
        }
      }

      /** Category */
      Element category = getChildElement(entry, "category");
      if (category != null) {

        /** Try to parse the Label */
        str = getAttributeValue(category, "label");

        /** Parse the Term then */
        if (!StringShop.isset(str))
          str = getAttributeValue(category, "term");

        if (StringShop.isset(str)) {
          rssNewsItem.setCategory(str);
          rssChannel.addAvailableNewsItemInfo("TABLE_HEADER_CATEGORY");
        }
      }

      /** Source */
      Element source = getChildElement(entry, "source");
      if (source != null) {
        str = getChildValue("id", source);
        if (StringShop.isset(str))
          rssNewsItem.setSource(str);
      }

      rssChannel.insertItem(rssNewsItem);
    }
  }
}