/*   **********************************************************************  **
 **   Copyright notice                                                       **
 **                                                                          **
 **   (c) 2003-2006 RSSOwl Development Team                                  **
 **   http://www.rssowl.org/                                                 **
 **                                                                          **
 **   All rights reserved                                                    **
 **                                                                          **
 **   This program and the accompanying materials are made available under   **
 **   the terms of the Eclipse Public License 1.0 which accompanies this     **
 **   distribution, and is available at:                                     **
 **   http://www.rssowl.org/legal/epl-v10.html                               **
 **                                                                          **
 **   A copy is found in the file epl-v10.html and important notices to the  **
 **   license from the team is found in the textfile LICENSE.txt distributed **
 **   in this package.                                                       **
 **                                                                          **
 **   This copyright notice MUST APPEAR in all copies of the file!           **
 **                                                                          **
 **   Contributors:                                                          **
 **     RSSOwl - initial API and implementation (bpasero@rssowl.org)         **
 **                                                                          **
 **  **********************************************************************  */

package net.sourceforge.rssowl.controller.thread;

import net.sourceforge.rssowl.controller.GUI;
import net.sourceforge.rssowl.controller.dialog.FeedSearchDialog;
import net.sourceforge.rssowl.dao.ConnectionManager;
import net.sourceforge.rssowl.dao.NewsfeedFactoryException;
import net.sourceforge.rssowl.dao.feedparser.FeedParser;
import net.sourceforge.rssowl.model.Category;
import net.sourceforge.rssowl.model.Channel;
import net.sourceforge.rssowl.util.i18n.Dictionary;
import net.sourceforge.rssowl.util.shop.BrowserShop;
import net.sourceforge.rssowl.util.shop.FileShop;
import net.sourceforge.rssowl.util.shop.RegExShop;
import net.sourceforge.rssowl.util.shop.StringShop;
import net.sourceforge.rssowl.util.shop.URLShop;
import net.sourceforge.rssowl.util.shop.XMLShop;

import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UTFDataFormatException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.Random;
import java.util.Vector;

/**
 * The FeedSearchManager performs a search for RSS / RDF feeds on a certain
 * topic. Each link that is in the result is checked for a valid feed. If the
 * link is a newsfeed, a new line is added to the table of the FeedSearchDialog.
 * 
 * @author <a href="mailto:bpasero@rssowl.org">Benjamin Pasero </a>
 * @version 1.2.3
 */
public class FeedSearchManager {

  /** Number of collector threads */
  private static final int COLLECTOR_THREAD_COUNT = 7;

  /** Domains that are allowed to use for search */
  private static final Vector localeSearchList = new Vector();

  /** One of the words from this array is combined with the search of the user */
  private static final String[] SEARCH_DELIMITER = new String[] { "rss", "rdf", "atom", "newsfeed", "weblog", "blog" };

  /** Token for start parameter */
  private static final String START_PARAM = "&start=";

  /** Collector threads to collect and parse links from the results of the search */
  private ExtendedThread collectorThreads[];

  /** If TRUE parse any link from the results. If FALSE parse .xml, .rss and .rdf */
  private boolean intensiveSearch;

  /** List of links from the results */
  private Vector links;

  /** If TRUE only display news that match the selected language */
  private boolean nativeLangSearch;

  /** Count the resulting Feeds */
  private int resultCounter;

  /** Thread that performs the search */
  private Thread searchThread;

  /** URL to get the results from */
  private String searchUrl;

  /** Topic that the user searches for */
  private String topic;

  /** Count the number of collector threads that have finished */
  int collectorsDone;

  /** Synchronized list of normal URL objects */
  Vector normalUrlObjects;

  /** Create one parser for each Collector thread */
  SAXBuilder parser[] = new SAXBuilder[COLLECTOR_THREAD_COUNT];

  /** Synchronized list of preferred URL objects */
  Vector preferredUrlObjects;

  /** Random Number Generator for sleep Time of Threads */
  Random randomNumberGenerator;

  /** The opened rss feedsearch dialog */
  FeedSearchDialog rssOwlFeedSearchDialog;

  /** Flag indicates the running threads */
  boolean running;

  /** Flag indicates when the search has finished */
  boolean searchDone;

  /**
   * Instantiate a new FeedSearchManager
   * 
   * @param topic Topic to search for
   * @param rssOwlFeedSearchDialog This dialog called the FeedSearchManager
   * @param intensiveSearch TRUE if intensive search is performed
   * @param nativeLangSearch TRUE if native lang search is performed
   */
  public FeedSearchManager(String topic, FeedSearchDialog rssOwlFeedSearchDialog, boolean intensiveSearch, boolean nativeLangSearch) {
    this.topic = topic;
    this.rssOwlFeedSearchDialog = rssOwlFeedSearchDialog;
    this.intensiveSearch = intensiveSearch;
    this.nativeLangSearch = nativeLangSearch;
    searchDone = false;
    running = false;
    collectorsDone = 0;
    resultCounter = 0;
    links = new Vector();
    collectorThreads = new ExtendedThread[COLLECTOR_THREAD_COUNT];
    randomNumberGenerator = new Random();

    /** Thread-safe HashSet */
    preferredUrlObjects = new Vector();
    normalUrlObjects = new Vector();

    /** Create the search URL that is used to perform the search */
    searchUrl = createUrl();

    /** Init XML Parsers */
    initXMLParsers();

    /** Create Threads */
    createSearchThread();
    createCollectorThreads();
  }

  /**
   * Add a value to the locale search list
   * 
   * @param value The value to add
   */
  public static void addIntoLocaleSearchList(String value) {
    localeSearchList.add(value);
  }

  /**
   * Get the state of the search
   * 
   * @return boolean TRUE if search is running
   */
  public boolean isRunning() {
    return running;
  }

  /** Start search- and collector threads */
  public void startSearch() {
    running = true;

    /** Start collector threads */
    for (int i = 0; i < COLLECTOR_THREAD_COUNT; i++)
      collectorThreads[i].start();

    /** Start search thread */
    searchThread.start();
  }

  /** Interrupt the search- and collector threads */
  public void stopSearch() {
    running = false;

    /** Interrupt collector threads */
    for (int i = 0; i < COLLECTOR_THREAD_COUNT; i++) {
      collectorThreads[i].stopThread();
      collectorThreads[i].interrupt();
    }

    /** Interrupt search thread */
    searchThread.interrupt();
  }

  /**
   * Build the XML document from the given URL
   * 
   * @param xmlUrl The XML Url object
   * @param forceDefaultEncoding If set to TRUE the encoding is not being
   * detected from the XMl document and the OS default encoding is used
   * @param parserNumber The number of the index in the parser array to use
   * @return Document The builded XML document from the URL or NULL on warning
   */
  private Document buildDocument(URL xmlUrl, boolean forceDefaultEncoding, int parserNumber) {
    Document document = null;

    /** Create a new Connection Manager */
    ConnectionManager connectionManager = new ConnectionManager(xmlUrl.toExternalForm());
    try {
      connectionManager.setShowLoginDialogIfRequired(false);
      connectionManager.setUserAgent(BrowserShop.getUserAgent());
      connectionManager.connect();

      /** Retrieve the InputStream from the connection */
      InputStream connectionInS = connectionManager.getInputStream();

      /** Search might have been stopped already */
      if (connectionInS == null || !running)
        return null;

      /** Check the content type if available */
      String contentType = URLConnection.guessContentTypeFromStream(connectionInS);

      /** In case content type is something else than application/xml */
      if (contentType != null && !contentType.equals("application/xml"))
        return document;

      /** Auto-Detect encoding from the XML declaration */
      if (!forceDefaultEncoding)
        document = parser[parserNumber].build(connectionInS);

      /** Use OS default encoding */
      else
        document = parser[parserNumber].build(new InputStreamReader(connectionInS));
    }

    /** Error while parsing the document */
    catch (JDOMException e) {
      /** Do not display */
    }

    /** UTF encoding exception */
    catch (UTFDataFormatException e) {

      /** Try parsing again with default encoding */
      if (!forceDefaultEncoding)
        return buildDocument(xmlUrl, true, parserNumber);
    }

    /** Error while connecting */
    catch (IOException e) {
      /** Do not display */
    }

    /** Error while parsing */
    catch (IllegalArgumentException e) {
      /** Do not display */
    }

    /** Close connection in any way */
    finally {
      connectionManager.closeConnection();
    }

    /** Return XML Newsfeed document */
    return document;
  }

  /** Collector thread checks if URLs in "urlObj" are valid RSS / RDF feeds */
  private void createCollectorThreads() {

    /** Create a number of threads */
    for (int i = 0; i < COLLECTOR_THREAD_COUNT; i++) {

      /** Remember number of the collector thread */
      final int threadNumber = i;

      /** Create new collector thread */
      collectorThreads[i] = new ExtendedThread("Feed Search Collector Thread #" + threadNumber) {

        /**
         * Each collector thread will get an URL from the search thread and trys
         * to parse the URL as newsfeed. In the case the URL is a valid newsfeed
         * it is added to the table as a result of the search
         */
        public void run() {
          ThreadLoop: while (!isStopped() && !isInterrupted()) {

            /** Check new URL objects every second */
            while (preferredUrlObjects.size() == 0 && normalUrlObjects.size() == 0 && !searchDone && !isStopped()) {
              try {

                /**
                 * To avoid that all collector threads wake up at the same time
                 * and look at the URL Objects list, we let the random number
                 * generator decide about the intervall
                 */
                sleep(500 + randomNumberGenerator.nextInt(500));
              } catch (InterruptedException e) {
                break ThreadLoop;
              }
            }

            /** Interrupt if search is done and all links are parsed */
            if (preferredUrlObjects.size() == 0 && normalUrlObjects.size() == 0 && searchDone) {
              stopThread();
              interrupt();
            }

            /** Check size again and remove top element from preferred URLs */
            if (preferredUrlObjects.size() > 0 && running)
              buildChannel((URL) preferredUrlObjects.remove(0), threadNumber);

            /** Check size again and remove top element from normal URLs */
            else if (normalUrlObjects.size() > 0 && running)
              buildChannel((URL) normalUrlObjects.remove(0), threadNumber);
          }

          /** Tell that this thread is done */
          collectorsDone++;
        }
      };
      collectorThreads[i].setDaemon(true);
    }
  }

  /** Search thread performs the search for the topic */
  private void createSearchThread() {
    searchThread = new Thread("Feed Search Thread") {

      /**
       * The search thread performs the querry for the given search pattern and
       * sends all URLs that maybe valid newsfeeds to the "urlObjects" List so
       * that the collector threads are able to check the URL
       */
      public void run() {
        try {

          /** Run through the search engine with the search topic */
          performSearch();

          /** The search has finished */
          searchDone = true;

          /** Wait until the collector threads have finished */
          while (collectorsDone != COLLECTOR_THREAD_COUNT) {
            try {
              sleep(200);
            } catch (InterruptedException e) {
              break;
            }
          }

          /** Show stopped Search in Dialog */
          rssOwlFeedSearchDialog.setSearching(false);

          /**
           * User could have closed the application already or performed a new
           * search
           */
          if (GUI.isAlive() && !rssOwlFeedSearchDialog.isSearching()) {

            /** Reset the buttons */
            GUI.display.asyncExec(new Runnable() {
              public void run() {
                rssOwlFeedSearchDialog.setButtonState(false);
                rssOwlFeedSearchDialog.stopStatusMessageAnimate();
                rssOwlFeedSearchDialog.setMessage(GUI.i18n.getTranslation("LABEL_SEARCH_FINISHED"));
              }
            });
          }
          running = false;
        }

        /** Display an error message if connection failed */
        catch (IOException e) {

          /** Set flags */
          searchDone = true;
          running = false;

          if (!isInterrupted()) {

            /** Only perform this Runnable if RSSOwl was not closed */
            if (GUI.isAlive()) {
              GUI.display.asyncExec(new Runnable() {
                public void run() {

                  /** Display warning messages and reset buttons */
                  rssOwlFeedSearchDialog.stopStatusMessageAnimate();
                  rssOwlFeedSearchDialog.setErrorMessage(GUI.i18n.getTranslation("ERROR_CONNECTION_FAILED"));
                  rssOwlFeedSearchDialog.setButtonState(false);
                  rssOwlFeedSearchDialog.setSearching(false);
                }
              });
            }
          }
        }
      }
    };
    searchThread.setDaemon(true);
  }

  /**
   * Create the URL to perform the search
   * 
   * @return String search URL
   */
  private String createUrl() {
    String searchValue = URLShop.urlEncode(topic);

    /** Extend the search value from the list of delimiters */
    searchValue += "+" + SEARCH_DELIMITER[0];
    for (int a = 1; a < SEARCH_DELIMITER.length; a++)
      searchValue += "+OR+" + SEARCH_DELIMITER[a];

    /** Per default use com */
    String domain = "com";
    String language = "en";

    /** Try to use a locale domain if available */
    if (localeSearchList.contains(Dictionary.selectedLocale.getCountry().toLowerCase())) {
      domain = Dictionary.selectedLocale.getCountry().toLowerCase();
      language = Dictionary.selectedLocale.getLanguage().toLowerCase();
    }

    /** If native language search */
    if (nativeLangSearch)
      return "http://www.google." + domain + "/search?q=" + searchValue + "&ie=UTF-8&oe=UTF-8&hl=" + language + "&num=10&meta=lr=lang_" + Dictionary.selectedLanguage;

    /** No native lang search */
    return "http://www.google." + domain + "/search?q=" + searchValue + "&ie=UTF-8&oe=UTF-8&hl=" + language + "&num=10";
  }

  /**
   * Filter the given URL. Do not add URLs which have a file name which marks
   * them as binary file
   * 
   * @param url Any URL to filter
   */
  private void filterLink(String url) {

    /** This malformed URL shows up very often, so remove it */
    if (url.equals("http://www"))
      return;

    /** Some pre-optimation */
    if (url.indexOf("/search?q=") >= 0 || url.indexOf("/translate?hl=") >= 0)
      return;

    /** Intensive method: Just filter URLs with binary extensions */
    if (intensiveSearch && URLShop.looksLikeBinary(url))
      return;

    /** Normal method: Only use URLs that have good extensions as filetype */
    if (!intensiveSearch && !URLShop.looksLikeNewsfeed(url, true))
      return;

    /** Canonicalize URL */
    url = URLShop.canonicalize(url);

    /** Replace common entity */
    url = StringShop.replaceAll(url, "&amp;", "&");

    /** Only add the URL if not yet added before */
    if (!links.contains(url) && !rssOwlFeedSearchDialog.getResults().containsKey(url)) {

      /** Add new URL Object to Vector urlObj and String value to Vector Links */
      try {
        URL urlObj = new URL(url);

        /** Prefer URLs with common newsfeed extension */
        if (URLShop.looksLikeNewsfeed(urlObj, true))
          preferredUrlObjects.add(urlObj);
        else
          normalUrlObjects.add(urlObj);

        links.add(url);
      }

      /** URL is not a valid url, return */
      catch (MalformedURLException e) {
        return;
      }
    }
  }

  /** Init one SAX Parser for each collector thread */
  private void initXMLParsers() {
    for (int a = 0; a < COLLECTOR_THREAD_COUNT; a++) {
      parser[a] = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
      parser[a].setFeature(XMLShop.ALLOW_JAVA_ENCODING, true);
      XMLShop.setDefaultEntityResolver(parser[a]);
    }
  }

  /**
   * Build RSS channel object from the given URl
   * 
   * @param url The URL of the newsfeed
   * @param parserNumber The number of the index in the parser array to use
   */
  void buildChannel(URL url, int parserNumber) {

    /** Try to parse the URL with JDom, auto-detect encoding */
    Document document = buildDocument(url, false, parserNumber);

    /** Document must not be null */
    if (document == null)
      return;

    /** Parse the Channel */
    try {
      FeedParser parser = new FeedParser(document, url.toExternalForm());
      parser.parse();
      final Channel rssChannel = parser.getChannel();

      /** Channel must not be null */
      if (rssChannel == null)
        return;

      /** See for language if native lang search was performed */
      if (!nativeLangSearch || (rssChannel.getLanguage() != null && rssChannel.getLanguage().equals(Dictionary.selectedLanguage))) {

        /** Increment resulting feeds Counter */
        resultCounter++;

        /** Cache the newsfeed if Feed is new */
        if (StringShop.isset(rssChannel.getLink()) && !Category.linkExists(rssChannel.getLink()))
          GUI.rssOwlGui.getFeedCacheManager().cacheNewsfeed(rssChannel.getLink(), rssChannel);

        /** Only perform this Runnable if RSSOwl was not closed */
        if (GUI.isAlive()) {
          GUI.display.asyncExec(new Runnable() {
            public void run() {

              /** Add new line if search is still running */
              if (running)
                rssOwlFeedSearchDialog.addResultElement(rssChannel.getLink(), rssChannel.getTitle(), rssChannel.getFormat());
            }
          });
        }
      }
    } catch (NewsfeedFactoryException e) {
      /** Error while parsing. Dont display. */
    }
  }

  /**
   * Perform a search on the search engine and add every link to the Stack, that
   * was found. Each link is first filtered, to see if it's not an image or zip
   * for example.
   * 
   * @throws IOException If connection fails
   */
  void performSearch() throws IOException {
    int start = 0;
    String result;

    /**
     * In case the Topic looks like an URL, first Crawl any URLs from that
     * WebSite and also try to interprete the given URL as a Newsfeed.
     */
    if (URLShop.looksLikeURL(topic)) {

      /** Search for Links in the result */
      try {
        Vector linkList = URLShop.crawlUrls(topic);

        /** Filter each link that was found */
        Iterator linkListIt = linkList.iterator();
        while (linkListIt.hasNext())
          filterLink((String) linkListIt.next());
      } catch (IOException e) {
        /** Fail Silently in order to proceed */
      }

      /** Also Filter Topic as possible feed */
      filterLink(topic);
    }

    /** Perform the search while result-pages are available */
    do {

      /** Create a new search URL */
      String currentSearchUrl = searchUrl + START_PARAM + start;

      /** Create a new ConnectionManager */
      ConnectionManager connectionManager = new ConnectionManager(currentSearchUrl);
      connectionManager.setShowLoginDialogIfRequired(false);
      connectionManager.setUserAgent(BrowserShop.getUserAgent());

      /** Connect */
      connectionManager.connect();

      /** Read entire content to a StringBuffer */
      result = FileShop.getContent(connectionManager.getInputStream());

      /** Close connection */
      connectionManager.closeConnection();

      /** Remove Search-Engines formatting from Links */
      result = StringShop.replaceAll(result, "<b>", "");
      result = StringShop.replaceAll(result, "</b>", "");

      /** Search for Links in the result */
      Vector linkList = new Vector();
      RegExShop.extractLinksFromText(result, linkList);

      /** Filter each link that was found */
      Iterator linkListIt = linkList.iterator();
      while (linkListIt.hasNext()) {
        filterLink((String) linkListIt.next());
      }

      /** Try next result page */
      start += 10;

      /**
       * For performance reasons the search-thread will pause execution, while
       * the collector threads parse the links from the urlObj list. When the
       * list size is 0 again, the search thread continues. This secures, that
       * at no time, the urlObj lists fills and the bandwith is too low to parse
       * them
       */
      while (normalUrlObjects.size() > 0 || preferredUrlObjects.size() > 0) {
        try {
          Thread.sleep(500);
        } catch (InterruptedException e) {
          return;
        }
      }
    } while (start <= 1000 && !searchThread.isInterrupted());
  }
}