/***************************************************************************
 *   Copyright (C) 2004-2008 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 1.0.4. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "c2bBibSearcherCache.h"
#include "c2bSettings.h"
#include "c2bUtils.h"

#include <QDir>
#include <QProcess>


c2bBibSearcherCache::c2bBibSearcherCache(QObject* parent) : QObject(parent)
{
    load_filename = "";
    cache_size = fileContents.size();
    settings = c2bSettingsP;
    p_text_file = settings->tmp_dir_path + "cb2bib_pdfconv.tmp";
    p_simplify_source = false;
}

c2bBibSearcherCache::~c2bBibSearcherCache()
{
    save();
}


void c2bBibSearcherCache::load(const QString& filename, bool simplify_source)
{
    p_simplify_source = simplify_source;
    p_pdf2txt_bin = settings->fileName("c2bBibSearcher/Pdf2TextBin");
    p_pdf2txt_arg = settings->value("c2bBibSearcher/Pdf2TextArg").toString();
    p_pdf2txt_arg = p_pdf2txt_arg.simplified();
    p_cache_dir = settings->fileName("cb2Bib/CacheDirectory");

    if (filename == load_filename)
        return;
    load_filename = filename;
    save();
    fileContents.clear();
    if (QFileInfo(p_cache_dir).exists() && QFileInfo(p_cache_dir).isDir())
        cache_filename = QDir::cleanPath(p_cache_dir + QDir::separator() + QFileInfo(load_filename).fileName() + ".c2b");
    else
        cache_filename = load_filename.trimmed() + ".c2b";
    if (QFileInfo(cache_filename).exists())
    {
        QFile file(cache_filename);
        file.open(QIODevice::ReadOnly);
        QDataStream ds(&file);
        ds >> fileContents;
    }
    cache_size = fileContents.size();
}

void c2bBibSearcherCache::save()
{
    if (cache_size != fileContents.size())
    {
        QFile file(cache_filename);
        file.open(QIODevice::WriteOnly);
        QDataStream ds(&file);
        ds << fileContents;
    }
}

const QString c2bBibSearcherCache::pdfToText(const QString& filename, int* pdfCounter, QString* logString, int* errorCounter)
{
    QString pdf_file = filename.trimmed();
    if (pdf_file.isEmpty())
        return QString();
    QFileInfo fi(pdf_file);
    if (!fi.exists() || !fi.isFile())
    {
        (*errorCounter)++;
        (*logString) += tr("% [cb2bib] File %1 does not exists\n").arg(pdf_file);
        return QString();
    }

    (*pdfCounter)++;
    if (p_simplify_source)
    {
        if (fileContents.contains("simp:" + pdf_file))
            return QString::fromUtf8(qUncompress(fileContents.value("simp:" + pdf_file)));
    }
    else
    {
        if (fileContents.contains(pdf_file))
            return QString::fromUtf8(qUncompress(fileContents.value(pdf_file)));
    }

    QProcess pdf2txt;
    QStringList arglist = p_pdf2txt_arg.split(" ", QString::SkipEmptyParts);
    arglist.append(pdf_file);
    arglist.append(p_text_file);
    pdf2txt.start(p_pdf2txt_bin, arglist);

    if (!pdf2txt.waitForFinished(150000))
        pdf2txt.kill();
    QString text = c2bUtils::fileToString(p_text_file, true);
    if (pdf2txt.exitStatus() != QProcess::NormalExit || text.isEmpty())
    {
        (*errorCounter)++;
        (*logString) += tr("% [cb2bib] [%1 %2] Conversion failed for file %3\n").arg(p_pdf2txt_bin).arg(p_pdf2txt_arg).arg(pdf_file);
        return QString();
    }

    text.remove(QRegExp("-\\s*[\\n\\r]\\s*(?=\\w)")); // Join hyphenated words
    text.replace(QRegExp("[\\n\\r]"), " ");
    text.replace(QChar(0), ' '); // Remove null chacracters (avoids truncation in toUtf8, djvutxt places them at the end of page)
    text = text.simplified();
    fileContents.insert(pdf_file, qCompress(QByteArray(text.toUtf8())));
    QString simp_text = c2bUtils::c2bToAscii(text, true);
    fileContents.insert("simp:" + pdf_file, qCompress(QByteArray(simp_text.toUtf8())));
    if (p_simplify_source)
        return simp_text;
    else
        return text;
}
