/***************************************************************************
 *   Copyright (C) 2004-2008 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 1.0.4. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "compositePattern.h"


compositePattern::compositePattern(const QString newPattern, Qt::CaseSensitivity newCs)
{
    pattern = newPattern.simplified();
    caseSensitivity = newCs;
    arx.setPatternSyntax(QRegExp::RegExp2);
    arx.setMinimal(false);
    // Setting arx case sensitive for efficiency
    arx.setCaseSensitivity(Qt::CaseSensitive);
}

compositePattern::~compositePattern()
{}


const QString compositePattern::escape(const QString& str, Qt::CaseSensitivity cs)
{
    if (cs == Qt::CaseSensitive)
        return QRegExp::escape(str);
    QString scaped;
    for (int i = 0; i < str.length(); ++i)
        if (str.at(i).isLetter())
            scaped += QString("[%1%2]").arg(str.at(i).toUpper()).arg(str.at(i).toLower());
        else
            scaped += QRegExp::escape(str.at(i));
    return scaped;
}

double compositePattern::probability(const QString& str)
{
    // Estimate string probability
    // Frequencies as found on a 306945065 letter set
    double f = 1.0;
    for (int i = 0; i < str.length(); ++i)
    {
        if (!str.at(i).isLetter())
            continue;
        switch (str.at(i).toLower().toAscii())
        {
        case 'a':
            f *= 0.0770;
            break;
        case 'b':
            f *= 0.0160;
            break;
        case 'c':
            f *= 0.0391;
            break;
        case 'd':
            f *= 0.0337;
            break;
        case 'e':
            f *= 0.1196;
            break;
        case 'f':
            f *= 0.0248;
            break;
        case 'g':
            f *= 0.0174;
            break;
        case 'h':
            f *= 0.0407;
            break;
        case 'i':
            f *= 0.0785;
            break;
        case 'j':
            f *= 0.0033;
            break;
        case 'k':
            f *= 0.0054;
            break;
        case 'l':
            f *= 0.0427;
            break;
        case 'm':
            f *= 0.0288;
            break;
        case 'n':
            f *= 0.0727;
            break;
        case 'o':
            f *= 0.0733;
            break;
        case 'p':
            f *= 0.0255;
            break;
        case 'q':
            f *= 0.0034;
            break;
        case 'r':
            f *= 0.0628;
            break;
        case 's':
            f *= 0.0641;
            break;
        case 't':
            f *= 0.0911;
            break;
        case 'u':
            f *= 0.0269;
            break;
        case 'v':
            f *= 0.0114;
            break;
        case 'w':
            f *= 0.0121;
            break;
        case 'x':
            f *= 0.0076;
            break;
        case 'y':
            f *= 0.0143;
            break;
        case 'z':
            f *= 0.0027;
            break;
        default:
            f *= 0.0003; // Maximum found for a non ascii letter
            break;
        }
    }
    return f;
}
