/*   **********************************************************************  **
 **   Copyright notice                                                       **
 **                                                                          **
 **   (c) 2003-2006 RSSOwl Development Team                                  **
 **   http://www.rssowl.org/                                                 **
 **                                                                          **
 **   All rights reserved                                                    **
 **                                                                          **
 **   This program and the accompanying materials are made available under   **
 **   the terms of the Eclipse Public License 1.0 which accompanies this     **
 **   distribution, and is available at:                                     **
 **   http://www.rssowl.org/legal/epl-v10.html                               **
 **                                                                          **
 **   A copy is found in the file epl-v10.html and important notices to the  **
 **   license from the team is found in the textfile LICENSE.txt distributed **
 **   in this package.                                                       **
 **                                                                          **
 **   This copyright notice MUST APPEAR in all copies of the file!           **
 **                                                                          **
 **   Contributors:                                                          **
 **     RSSOwl - initial API and implementation (bpasero@rssowl.org)         **
 **                                                                          **
 **  **********************************************************************  */

package net.sourceforge.rssowl.controller.thread;

import net.sourceforge.rssowl.controller.GUI;
import net.sourceforge.rssowl.controller.dialog.FeedDiscoveryDialog;
import net.sourceforge.rssowl.dao.ConnectionManager;
import net.sourceforge.rssowl.dao.NewsfeedFactoryException;
import net.sourceforge.rssowl.dao.feedparser.FeedParser;
import net.sourceforge.rssowl.model.Category;
import net.sourceforge.rssowl.model.Channel;
import net.sourceforge.rssowl.util.shop.RegExShop;
import net.sourceforge.rssowl.util.shop.StringShop;
import net.sourceforge.rssowl.util.shop.URLShop;
import net.sourceforge.rssowl.util.shop.XMLShop;

import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UTFDataFormatException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.Random;
import java.util.Vector;

/**
 * The FeedDiscoveryManager performs a search for RSS / RDF feeds on a certain
 * website. Each link that is in the result is checked for a valid feed. If the
 * link is a newsfeed, a new line is added to the table of the
 * FeedDiscoveryDialog.
 * 
 * @author <a href="mailto:bpasero@rssowl.org">Benjamin Pasero </a>
 * @version 1.2.3
 */
public class FeedDiscoveryManager {

  /** Number of collector threads */
  private static final int COLLECTOR_THREAD_COUNT = 7;

  /** Collector threads to collect and parse links from the results of the search */
  private ExtendedThread collectorThreads[];

  /** Thread that performs the search */
  private Thread discoveryThread;

  /** List of links from the results */
  private Vector links;

  /** Count the resulting Feeds */
  private int resultCounter;

  /** Website to perform the search on */
  private URL webSiteURL;

  /** Count the number of collector threads that have finished */
  int collectorsDone;

  /** Synchronized list of normal URL objects */
  Vector normalUrlObjects;

  /** Create one parser for each Collector thread */
  SAXBuilder parser[] = new SAXBuilder[COLLECTOR_THREAD_COUNT];

  /** Synchronized list of preferred URL objects */
  Vector preferredUrlObjects;

  /** Random Number Generator for sleep Time of Threads */
  Random randomNumberGenerator;

  /** The opened rss feeddiscovery dialog */
  FeedDiscoveryDialog rssOwlFeedDiscoveryDialog;

  /** Flag indicates the running threads */
  boolean running;

  /** Flag indicates when the search has finished */
  boolean searchDone;

  /**
   * Instantiate a new FeedDiscoveryManager
   * 
   * @param webSiteURL The website to perform the search on
   * @param rssOwlFeedDiscoveryDialog This dialog called the FeedSearchManager
   * @throws MalformedURLException In case the URL is malformed
   */
  public FeedDiscoveryManager(String webSiteURL, FeedDiscoveryDialog rssOwlFeedDiscoveryDialog) throws MalformedURLException {
    this.webSiteURL = new URL(URLShop.canonicalize(webSiteURL));
    this.rssOwlFeedDiscoveryDialog = rssOwlFeedDiscoveryDialog;
    searchDone = false;
    running = false;
    collectorsDone = 0;
    resultCounter = 0;
    links = new Vector();
    collectorThreads = new ExtendedThread[COLLECTOR_THREAD_COUNT];
    randomNumberGenerator = new Random();

    /** Thread-safe HashSet */
    preferredUrlObjects = new Vector();
    normalUrlObjects = new Vector();

    /** Init XML Parsers */
    initXMLParsers();

    /** Also add website to list of possible newsfeeds */
    filterLink(webSiteURL);

    /** Create Threads */
    createSearchThread();
    createCollectorThreads();
  }

  /**
   * Get the state of the search
   * 
   * @return boolean TRUE if search is running
   */
  public boolean isRunning() {
    return running;
  }

  /** Start search- and collector threads */
  public void startSearch() {
    running = true;

    /** Start collector threads */
    for (int i = 0; i < COLLECTOR_THREAD_COUNT; i++)
      collectorThreads[i].start();

    /** Start search thread */
    discoveryThread.start();
  }

  /** Interrupt the search- and collector threads */
  public void stopSearch() {
    running = false;

    /** Interrupt collector threads */
    for (int i = 0; i < COLLECTOR_THREAD_COUNT; i++) {
      collectorThreads[i].stopThread();
      collectorThreads[i].interrupt();
    }

    /** Interrupt search thread */
    discoveryThread.interrupt();
  }

  /**
   * Build the XML document from the given URL
   * 
   * @param xmlUrl The XML Url object
   * @param forceDefaultEncoding If set to TRUE the encoding is not being
   * detected from the XMl document and the OS default encoding is used
   * @param parserNumber The number of the index in the parser array to use
   * @return Document The builded XML document from the URL or NULL on warning
   */
  private Document buildDocument(URL xmlUrl, boolean forceDefaultEncoding, int parserNumber) {
    Document document = null;

    /** Create a new Connection Manager */
    ConnectionManager connectionManager = new ConnectionManager(xmlUrl.toExternalForm());
    try {
      connectionManager.setShowLoginDialogIfRequired(false);
      connectionManager.connect();

      /** Retrieve the InputStream to the content */
      InputStream connectionInS = connectionManager.getInputStream();

      /** Search might have been stopped already */
      if (connectionInS == null || !running)
        return null;

      /** Check the content type if available */
      String contentType = URLConnection.guessContentTypeFromStream(connectionInS);

      /** In case content type is something else than application/xml */
      if (contentType != null && !contentType.equals("application/xml"))
        return document;

      /** Auto-Detect encoding from the XML declaration */
      if (!forceDefaultEncoding)
        document = parser[parserNumber].build(connectionInS);

      /** Use OS default encoding */
      else
        document = parser[parserNumber].build(new InputStreamReader(connectionInS));
    }

    /** Error while parsing the document */
    catch (JDOMException e) {
      /** Do not display */
    }

    /** UTF encoding exception */
    catch (UTFDataFormatException e) {

      /** Try parsing again with default encoding */
      if (!forceDefaultEncoding)
        return buildDocument(xmlUrl, true, parserNumber);
    }

    /** Error while connecting */
    catch (IOException e) {
      /** Do not display */
    }

    /** Error while parsing */
    catch (IllegalArgumentException e) {
      /** Do not display */
    }

    /** Close connection in any way */
    finally {
      connectionManager.closeConnection();
    }

    /** Return XML Newsfeed document */
    return document;
  }

  /**
   * Collector thread checks if URLs in "urlObj" are RSS / RDF / Atom feeds
   */
  private void createCollectorThreads() {

    /** Create a number of threads */
    for (int i = 0; i < COLLECTOR_THREAD_COUNT; i++) {

      /** Remember number of the collector thread */
      final int threadNumber = i;

      /** Create new collector thread */
      collectorThreads[i] = new ExtendedThread("Feed Discovery Collector Thread #" + threadNumber) {

        /**
         * Each collector thread will get an URL from the search thread and trys
         * to parse the URL as newsfeed. In the case the URL is a valid newsfeed
         * it is added to the table as a result of the search
         */
        public void run() {
          ThreadLoop: while (!isStopped() && !isInterrupted()) {

            /** Check new URL objects every second */
            while (preferredUrlObjects.size() == 0 && normalUrlObjects.size() == 0 && !searchDone && !isStopped()) {
              try {

                /**
                 * To avoid that all collector threads wake up at the same time
                 * and look at the URL Objects list, we let the random number
                 * generator decide about the intervall
                 */
                sleep(500 + randomNumberGenerator.nextInt(500));
              } catch (InterruptedException e) {
                break ThreadLoop;
              }
            }

            /** Interrupt if search is done and all links are parsed */
            if (preferredUrlObjects.size() == 0 && normalUrlObjects.size() == 0 && searchDone) {
              stopThread();
              interrupt();
            }

            /** Check size again and remove top element from preferred URLs */
            if (preferredUrlObjects.size() > 0 && running)
              buildChannel((URL) preferredUrlObjects.remove(0), threadNumber);

            /** Check size again and remove top element from normal URLs */
            else if (normalUrlObjects.size() > 0 && running)
              buildChannel((URL) normalUrlObjects.remove(0), threadNumber);
          }

          /** Tell that this thread is done */
          collectorsDone++;
        }
      };
      collectorThreads[i].setDaemon(true);
    }
  }

  /** Search thread performs the search for the topic */
  private void createSearchThread() {
    discoveryThread = new Thread("Feed Discovery Thread") {

      /**
       * The search thread performs the querry for the given search pattern and
       * sends all URLs that maybe valid newsfeeds to the "preferredUrlObjects"
       * List so that the collector threads are able to check the URL
       */
      public void run() {
        try {

          /** Run through the search engine with the search topic */
          performSearch();

          /** The search has finished */
          searchDone = true;

          /** Wait until the collector threads have finished */
          while (collectorsDone != COLLECTOR_THREAD_COUNT) {
            try {
              sleep(200);
            } catch (InterruptedException e) {
              break;
            }
          }

          /** Show stopped Search in Dialog */
          rssOwlFeedDiscoveryDialog.setSearching(false);

          /**
           * User could have closed the application already or performed a new
           * search
           */
          if (GUI.isAlive() && !rssOwlFeedDiscoveryDialog.isSearching()) {

            /** Reset the buttons */
            GUI.display.asyncExec(new Runnable() {
              public void run() {
                rssOwlFeedDiscoveryDialog.setButtonState(false);
                rssOwlFeedDiscoveryDialog.stopStatusMessageAnimate();
                rssOwlFeedDiscoveryDialog.setMessage(GUI.i18n.getTranslation("LABEL_SEARCH_FINISHED"));
              }
            });
          }
          running = false;
        }

        /** Display an error message if connection failed */
        catch (IOException e) {

          /** Set flags */
          searchDone = true;
          running = false;

          if (!isInterrupted()) {

            /** Only perform this Runnable if RSSOwl was not closed */
            if (GUI.isAlive()) {
              GUI.display.asyncExec(new Runnable() {
                public void run() {

                  /** Display warning messages and reset buttons */
                  rssOwlFeedDiscoveryDialog.stopStatusMessageAnimate();
                  rssOwlFeedDiscoveryDialog.setErrorMessage(GUI.i18n.getTranslation("ERROR_CONNECTION_FAILED"));
                  rssOwlFeedDiscoveryDialog.setButtonState(false);
                  rssOwlFeedDiscoveryDialog.setSearching(false);
                }
              });
            }
          }
        }
      }
    };
    discoveryThread.setDaemon(true);
  }

  /**
   * Filter the given link. Do not add URLs which have a file name which marks
   * them as binary file. Create absolute links from relative ones.
   * 
   * @param url Any URL to filter
   * @throws MalformedURLException In case the URL is not valid
   */
  private void filterLink(String url) throws MalformedURLException {

    /** This malformed URL shows up very often, so remove it */
    if (url.equals("http://www"))
      return;

    /** If the URL shows one of the binary extensions, return */
    if (URLShop.looksLikeBinary(url))
      return;

    /** The URL could be a relative URL, resolve it */
    if (!RegExShop.isValidURL(url))
      url = new URL(webSiteURL, url).toExternalForm();

    /** Canonicalize URL if needed */
    url = URLShop.canonicalize(url);

    /** Replace common entity */
    url = StringShop.replaceAll(url, "&amp;", "&");

    /** Only add the URL if not yet added before */
    if (!links.contains(url) && !rssOwlFeedDiscoveryDialog.getResults().containsKey(url)) {

      /** Add new URL Object to Vector urlObj and String value to Vector Links */
      URL urlObj = new URL(url);

      /** Prefer URLs with common newsfeed extension */
      if (URLShop.looksLikeNewsfeed(urlObj, false))
        preferredUrlObjects.add(urlObj);
      else
        normalUrlObjects.add(urlObj);

      links.add(url);
    }
  }

  /** Init one SAX Parser for each collector thread */
  private void initXMLParsers() {
    for (int a = 0; a < COLLECTOR_THREAD_COUNT; a++) {
      parser[a] = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
      parser[a].setFeature(XMLShop.ALLOW_JAVA_ENCODING, true);
      XMLShop.setDefaultEntityResolver(parser[a]);
    }
  }

  /**
   * Build RSS channel object from the given URl
   * 
   * @param url The URL of the newsfeed
   * @param parserNumber The number of the index in the parser array to use
   */
  void buildChannel(URL url, int parserNumber) {

    /** Try to parse the URL with JDom, auto-detect encoding */
    Document document = buildDocument(url, false, parserNumber);

    /** Document must not be null */
    if (document == null)
      return;

    /** Parse the Channel */
    try {
      FeedParser parser = new FeedParser(document, url.toExternalForm());
      parser.parse();
      final Channel rssChannel = parser.getChannel();

      /** Channel must not be null */
      if (rssChannel == null)
        return;

      /** Increment resulting feeds Counter */
      resultCounter++;

      /** Cache the newsfeed if Feed is new */
      if (StringShop.isset(rssChannel.getLink()) && !Category.linkExists(rssChannel.getLink()))
        GUI.rssOwlGui.getFeedCacheManager().cacheNewsfeed(rssChannel.getLink(), rssChannel);

      /** Only perform this Runnable if RSSOwl was not closed */
      if (GUI.isAlive()) {
        GUI.display.asyncExec(new Runnable() {
          public void run() {

            /** Add new line if search is still running */
            if (running)
              rssOwlFeedDiscoveryDialog.addResultElement(rssChannel.getLink(), rssChannel.getTitle(), rssChannel.getFormat());
          }
        });
      }
    } catch (NewsfeedFactoryException e) {
      /** Error while parsing. Dont display. */
    }
  }

  /**
   * Perform a search on the given website and add every link to the Stack, that
   * was found. Each link is first filtered, to see if it's not an image or zip
   * for example.
   * 
   * @throws IOException If connection fails
   */
  void performSearch() throws IOException {

    /** Crawl all Links from the given WebSite */
    Vector linkList = URLShop.crawlUrls(webSiteURL.toExternalForm());

    /** Filter each link that was found */
    Iterator linkListIt = linkList.iterator();
    while (linkListIt.hasNext()) {
      try {
        filterLink((String) linkListIt.next());
      } catch (MalformedURLException e) {
        /** Ignore Malformed URLs */
      }
    }
  }
}