/***************************************************************************
 *   Copyright (C) 2004-2008 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 1.0.4. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "bibParser.h"

#include "c2bSettings.h"
#include "c2bUtils.h"


bibParser::bibParser(QObject* parent) : QObject(parent)
{
    // Setting Reference Types
    setBibTypes();

    // Setting Reference Fields
    setFields();

    // Setting Regular Expressions
    setRegularExpressions();

    // Creating Month List
    MDB = new monthDB();
}

bibParser::~bibParser()
{
    delete MDB;
}


QString bibParser::medlToc2b(const QString& au)
{
    // Preprocess Author from Medline 'AAAAAAA BB' to Aaaaaaa, BB'
    // which can be unambiguously translated to 'B. B. Aaaaaaa'
    // Takes care of cb2Bib included prefixes and suffixes

    QString FullN = au.simplified();
    FullN.remove(",");
    // Attention: prefixes and sufixes come from astring
    FullN.replace(QRegExp("\\b(da|de|del|der|di|do|dos|van|vande|von)\\s", Qt::CaseInsensitive), "\\1+");
    FullN.replace("+", "_");
    QStringList list = FullN.split(QRegExp("[,\\s]"));
    QString LastN = list.first();
    if (c2bUtils::isUpperCaseString(LastN))
    {
        LastN = LastN.toLower();
        LastN[0] = LastN.at(0).toUpper();
        int ii = LastN.indexOf(QRegExp("[-']"));
        if (ii++ > 0)
            LastN[ii] = LastN.at(ii).toUpper();
    }
    QString FisrtN = "";
    for (QStringList::Iterator it = ++(list.begin()); it != list.end(); ++it)
        FisrtN += QString(" %1").arg(*it);
    QString isSuffix = list.last();
    if (isSuffix.contains(QRegExp("\\b(2nd|3rd|Jr|II|III)\\b")))
    {
        isSuffix.replace(QRegExp("\\b2nd\\b"), "II");
        isSuffix.replace(QRegExp("\\b3rd\\b"), "III");
        LastN += " " + isSuffix;
        FisrtN.remove(QRegExp("\\b(2nd|3rd|Jr|II|III)\\b"));
    }
    // Attention: prefixes and sufixes also appear in authorString::toBib
    LastN.replace(QRegExp("(da|de|del|der|di|do|dos|van|vande|von)_", Qt::CaseInsensitive), "\\1 ");
    FullN = LastN + "," + FisrtN;
    return FullN;
}

QString bibParser::setPages(const QString& pp)
{
    QString Pages = pp;
    Pages.replace(QRegExp("\\W"), " ");
    Pages = Pages.simplified();
    Pages.replace(" ", " - ");
    QRegExp rx1 = QRegExp("^(\\d+)\\s*-\\s*(\\d+)$");
    QRegExp rx2 = QRegExp("^(\\d+)\\s*-\\s*(\\d+)pp$");
    if (Pages.contains(rx1))
    {
        QString fp = rx1.cap(1);
        Pages = fp + " - ";
        QString lp = rx1.cap(2);
        uint lfp = fp.length();
        uint llp = lp.length();
        if (lfp > llp)
            lp = fp.remove(lfp - llp, llp) + lp;       // eg, 123-7  ->  123 - 127
        Pages += lp;
    }
    else if (Pages.contains(rx2))
    {
        Pages = rx2.cap(1);
        int ilp = rx2.cap(2).toInt() - 1;
        if (ilp > 0)
            Pages += " - " + QString().setNum(rx2.cap(1).toInt() + ilp);       // eg, 123-7pp  ->  123 - 129
    }
    const QString separator = c2bSettingsP->value("cb2Bib/PageNumberSeparator").toString();
    if (!separator.isEmpty())
        Pages.replace(" - ", separator);
    return Pages;
}

void bibParser::setFields()
{
    bibFieldList <<
    "title" <<
    "author" <<
    "journal" <<
    "booktitle" <<
    "series" <<
    "chapter" <<
    "pages" <<
    "volume" <<
    "number" <<
    "edition" <<
    "institution" <<
    "organization" <<
    "school" <<
    "address" <<
    "month" <<
    "year" <<
    "editor" <<
    "publisher" <<
    "abstract" <<
    "keywords" <<
    "isbn" <<
    "issn" <<
    "doi" <<
    "eprint" <<
    "file" <<
    "url" <<
    "note" <<
    "annote";
    bibFieldSortedList = bibFieldList;
    qSort(bibFieldSortedList);
}

void bibParser::setBibTypes()
{
    TypesList << "" <<
    "article" <<
    "book" <<
    "booklet" <<
    "conference" <<
    "inbook" <<
    "incollection" <<
    "inproceedings" <<
    "manual" <<
    "mastersthesis" <<
    "misc" <<
    "periodical" <<
    "phdthesis" <<
    "proceedings" <<
    "techreport" <<
    "unpublished";
}

void bibParser::setRegularExpressions()
{
    field_re = QRegExp("\\b(" + bibFieldList.join("|") + ")\\b");
    bib_begin_re = QRegExp("@\\w+\\s*\\{");
    bib_begin0_re = QRegExp("^\\s*@\\w+\\s*\\{");
    bib_begin1_re = QRegExp("[\\r\\n]\\s*@\\w+\\s*\\{");
    bib_key_re = QRegExp("^@\\w+\\s*\\{\\s*([\\w:\\.-]+),");
    bib_type_re = QRegExp("^@(\\w+)\\s*\\{");

    // List of regular expressions for extracting bib fields
    QStringList::Iterator it = bibFieldList.begin();
    while (it != bibFieldList.end())
    {
        QRegExp bf("\\b" + *it + "\\s*=\\s*[\\{\"]", Qt::CaseInsensitive);
        bf.setMinimal(true);
        bib_fieldList_re.append(bf);
        // Consider non-braces case (eg. 'year = 2000,')
        QRegExp bfNB("\\b" + *it + "\\s*=\\s*(\\w*)\\s*\\,", Qt::CaseInsensitive);
        bfNB.setMinimal(true);
        bib_fieldListNB_re.append(bfNB);
        it++;
    }
}

void bibParser::initReferenceParsing(const QString& fn, const QStringList& fields, bibReference* ref)
{
    // Set base dir for cases of relative 'file'
    bib_file_dir = QFileInfo(fn).absolutePath() + QDir::separator();
    // Init file parsing for given fields
    ref->clearFields();
    ref->clearReference();
    ref->bib_fieldList = fields;
    for (int i = 0; i < fields.count(); ++i)
    {
        QRegExp bf("\\b" + fields.at(i) + "\\s*=\\s*[\\{\"]", Qt::CaseInsensitive);
        bf.setMinimal(true);
        ref->bib_fieldList_re.append(bf);
        // Consider non-braces case (eg. 'year = 2000,')
        QRegExp bfNB("\\b" + fields.at(i) + "\\s*=\\s*(\\w*)\\s*\\,", Qt::CaseInsensitive);
        bfNB.setMinimal(true);
        ref->bib_fieldListNB_re.append(bfNB);
    }
}

const bibReference bibParser::wholeReference(const QString& str)
{
    // Skip keyName and positionValue as they are not needed here
    // Skip trimming it is done later in c2bBibParser::parse()
    bibReference ref;
    int pos = referenceStarts(str);
    if (pos < 0)
        return ref;
    QString str_ref = referenceAt(str, &pos);
    c2bUtils::fullBibToC2b(str_ref);
    str_ref = str_ref.simplified();
    bib_type_re.indexIn(str_ref);
    ref.typeName = bib_type_re.cap(1).toLower();
    str_ref.replace(str_ref.length() - 1, 1, ",}");
    QString fvalue;
    for (int i = 0; i < bib_fieldList_re.count(); ++i)
    {
        QRegExp* bf = &bib_fieldList_re[i];
        pos = bf->indexIn(str_ref);
        if (pos > 0)
        {
            if (inBraces(pos + bf->matchedLength(), str_ref, &fvalue))
                ref[bibFieldList.at(i)] = fvalue;
        }
        else
        {
            bf = &bib_fieldListNB_re[i];
            if (bf->indexIn(str_ref) > -1)
                ref[bibFieldList.at(i)] = bf->cap(1);
        }
    }
    return ref;
}
