/*   **********************************************************************  **
 **   Copyright notice                                                       **
 **                                                                          **
 **   (c) 2003-2006 RSSOwl Development Team                                  **
 **   http://www.rssowl.org/                                                 **
 **                                                                          **
 **   All rights reserved                                                    **
 **                                                                          **
 **   This program and the accompanying materials are made available under   **
 **   the terms of the Eclipse Public License 1.0 which accompanies this     **
 **   distribution, and is available at:                                     **
 **   http://www.rssowl.org/legal/epl-v10.html                               **
 **                                                                          **
 **   A copy is found in the file epl-v10.html and important notices to the  **
 **   license from the team is found in the textfile LICENSE.txt distributed **
 **   in this package.                                                       **
 **                                                                          **
 **   This copyright notice MUST APPEAR in all copies of the file!           **
 **                                                                          **
 **   Contributors:                                                          **
 **     RSSOwl - initial API and implementation (bpasero@rssowl.org)         **
 **                                                                          **
 **  **********************************************************************  */

package net.sourceforge.rssowl.util.shop;

import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * Factory class for regular expression concerns in RSSOwl
 * 
 * @author <a href="mailto:bpasero@rssowl.org">Benjamin Pasero </a>
 * @version 1.2.3
 */
public class RegExShop {

  /** Constant to show that regex was compiled successfully */
  public static final String REGEX_OK = "OK";

  /** The reg. expression to match HTML Anchors */
  private static final String HTML_ANCHOR_REGEX = "<[a|link]([^>]+)href=('|\")?([^>\"'\\s]+)";

  /** The compiled pattern to match HTML Anchors (is Case Insensitive) */
  private static final Pattern HTML_ANCHOR_REGEX_PATTERN = Pattern.compile(HTML_ANCHOR_REGEX, Pattern.CASE_INSENSITIVE);

  /** List of common RegEx special chars */
  private static final String regExSymbols[] = new String[] { "\\", "+", "-", "?", "*", ".", "[", "]", "(", ")", "{", "}", "^", "$", "|" };

  /** The reg. expression for an URL */
  private static final String URL_REGEX = "(www([\\wv\\-\\.,@?^=%&:/~\\+#]*[\\w\\-\\@?^=%&/~\\+#])?)|(http|ftp|https):\\/\\/[\\w]+(.[\\w]+)([\\wv\\-\\.,@?^=%&:/~\\+#]*[\\w\\-\\@?^=%&/~\\+#])?";

  /** The compiled pattern to match an URL */
  private static final Pattern URL_REGEX_PATTERN = Pattern.compile(URL_REGEX);

  /** The reg. expression to match Whitespaces */
  private static final String WHITESPACE_REGEX = "[\\s]+";

  /** The reg. expression for HTML tags */
  static final String HTML_TAG_REGEX = "<[^>]+>";

  /** The compiled pattern to match HTML tags */
  static final Pattern HTML_TAG_REGEX_PATTERN = Pattern.compile(HTML_TAG_REGEX);

  /** The compiled pattern to match Whitespaces */
  static final Pattern WHITESPACE_PATTERN = Pattern.compile(WHITESPACE_REGEX);

  /** This utility class constructor is hidden */
  private RegExShop() {
  // Protect default constructor
  }

  /**
   * Compile a String and return either "OK" if the String is a valid RegEx, or
   * the warning message if not.
   * 
   * @param str Any String interpreted as RegEx
   * @return String The Result of the compilation
   */
  public static String compileRegEx(String str) {
    try {
      Pattern.compile(str);
    } catch (PatternSyntaxException e) {
      return e.getMessage();
    }
    return REGEX_OK;
  }

  /**
   * Escape RegEx special chars from the String and return it.
   * 
   * @param str Any String
   * @return String RegEx escaped String
   */
  public static String escapeRegEx(String str) {

    /** Escape common RegEx special chars */
    for (int a = 0; a < regExSymbols.length; a++)
      str = StringShop.replaceAll(str, regExSymbols[a], "\\" + regExSymbols[a]);

    return str;
  }

  /**
   * Extract all links from the given String and fill them into the given
   * HashSet. This method considers relative links too. Only use this method if
   * the Text is using HTML links in anchor-tags.
   * 
   * @param text The String to search for links
   * @param linkList The collection to fill
   */
  public static void extractLinksFromHTML(String text, Vector linkList) {
    Matcher match = HTML_ANCHOR_REGEX_PATTERN.matcher(text);

    while (match.find()) {
      String str = match.group(3);
      if (StringShop.isset(str) && !linkList.contains(str))
        linkList.add(str);
    }
  }

  /**
   * Extract all links from the given String and fill them into the given
   * HashSet. This method will NOT consider relative links. Only use this method
   * when you are searching for absolute links in a text (which may also be
   * HTML).
   * 
   * @param text The String to search for links
   * @param linkList The collection to fill
   */
  public static void extractLinksFromText(String text, Vector linkList) {
    Matcher match = URL_REGEX_PATTERN.matcher(text);

    while (match.find()) {
      String str = match.group(0);
      if (StringShop.isset(str) && !linkList.contains(str))
        linkList.add(str);
    }
  }

  /**
   * Returns TRUE in case the needle is surrounded by tags or includes tags
   * inside the haystack
   * 
   * @param haystack The String to search for tags
   * @param needle The String to check if surrounded by tags
   * @return boolean TRUE in case the needle is surrounded by tags
   */
  public static boolean isInsideTags(String haystack, String needle) {

    /** Quick check if needle already includes tags */
    if (needle.indexOf('<') >= 0 || needle.indexOf('>') >= 0)
      return true;

    Pattern pattern = Pattern.compile("<[^>]*" + RegExShop.escapeRegEx(needle) + "[^>]*>");
    return pattern.matcher(haystack).find();
  }

  /**
   * Check if the given URL is valid
   * 
   * @param url The URL to check
   * @return boolean TRUE if the link is valid
   */
  public static boolean isValidURL(String url) {
    return URL_REGEX_PATTERN.matcher(url).matches();
  }
}