""" Publication and citations retrieval
    
*A. Kopmann, 6.2.17 (ak)*


Scope:
    Publications are once added to wordpressas a post or comment.
    Afterwards scopus will not change or modify anything any more.
    Update is completely in the resonsibility of the ufo users.

The operation of the script splits in four phases:
    - Read all publications for one or more author groups
      The groups are all defined in the configuration file
      The publications are stored in a local cache database
    - For all new publication a post in Wordpress is created.
      The post is added to the catogeries accouring to the matching
      author groups
    - For each publication the citations are requested and stored
      in the local cache database as well
    - For each new citation a Wordpress comment is created.


Todo:
    - add mail to author button
    - save full scopus data in the database
    - Add a script to save the data for all publications in the database!!!
      There was some problem before?!
    - Add scripts to check consistence in the database
      and fix problems if detected  
      E.g. search for wpcommentid == 0
      Check if, wp posts + comments are still availabe, display
      deleted entries
"""


# Configuration - Scopus

import datetime
import requests
import json
import os.path

from ak_scopus import get_scopus_list, get_scopus_data, get_scopus_refs


from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus, wordpress_get_post


# Mysql persistent data (Accout: scopus, $scopus$)
import pymysql.cursors
import pymysql

from config import *


# Summary
npubs = 0
nnewpubs= 0
ncites = 0
nnewcites = 0


def update_publications(authids,authname='',scopus_opts = '',max=0):
    """ Read publications of a list of authors and store in the database """


    print "=== Update of publications for the author group: " + authname
    #print str(authids)


    # Connect to the database
    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)

    # Request all publications of a list of authors (in one query)
    # Result: list of records with (scopus ids, eid, citedbycount)
    # The citation could be used later also by wordpress (may be via a plugin)

    publist = get_scopus_list(authids,scopus_opts,max)
    #publist = get_scopus_list(authids, scopus_opts, 3)
    #publist = get_scopus_list(authids, '(PUBYEAR AFT 2014)')
    print "Total number of publications: %d" % len(publist)
    #print publist


    # Save all publication to the publication database
    try:
        with connection.cursor() as cursor:
            for pub in publist:
                # 1 / Create a new records
                #print pub # Todo: strip the prefix SCOPUS_ID?!
                sql = "INSERT IGNORE INTO `publications` (`scopusid`,`eid`) VALUES (%s,%s)"
                cursor.execute(sql, (pub[0],pub[1]))
                    
                sql = "UPDATE `publications` SET `citedbycount` = %s WHERE `scopusid` = %s"
                cursor.execute(sql, (pub[2],pub[0]))

                # 2 / Add categories
                if len(authname) > 0:
                    catlist = []
                    sql = "SELECT categories FROM publications WHERE scopusid = %s"
                    cursor.execute(sql, (pub[0]))
                    result = cursor.fetchall()
                    if len(result) > 0:
                        #print "Categories %s" % result[0]['categories']
                        cat = result[0]['categories']

                    try:
                        catlist = json.loads(cat)
                    except TypeError:
			#print("No categories upto now")
			pass

                    if authname not in catlist:
                        catlist += [authname]
                
                    sql = "UPDATE `publications` SET `categories` = %s WHERE `scopusid` = %s"
                    cursor.execute(sql, (json.dumps(catlist),pub[0]))
                        

            # connection is not autocommit by default. So you must commit to save
            # your changes.
            connection.commit()

    finally:
        connection.close()


def update_citations():
    """ Read all citations and store in the citation table """

    print ""
    print "=== Update citatation of all publication in the database"
    
    # Connect to the database
    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)

    # Loop over the publications and read all citations from scopus

    # Todo: Shift to a separate script !?
    try:
        with connection.cursor() as cursor:
            # Read a single record
            sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications WHERE wpid > 0"
            cursor.execute(sql)
            result = cursor.fetchall()

            for pub in result:
                wpid = int(pub['wpid'])
                if pub['citedbycount'] is None:
                    citedbycount = 0
                else:
                    citedbycount = int(pub['citedbycount'])
                if pub['citesloaded'] is None:
                    citesloaded = 0
                else:
                    citesloaded = int(pub['citesloaded'])

                # read list of citations
                if pub['eid'] and (citedbycount > citesloaded):
                    
                    print "Processing %d = %s previously cited by %d"  % (wpid, pub['eid'], citesloaded)

                    data = get_scopus_refs(pub['eid'])
                    #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': '))
                    
                    n = len(data)
                    #print "Number of citations loaded for processing %d" % n
                    #print data
                    
                    if n > 0:
                        for pub in data:
                            #print pub['eid'] + '  ' + pub['dc:title']
                           
                            try:
                                pubstr = json.dumps(pub)
                            except TypeError:
                                print("Error serializing pub entry")

                            # save all comments to the database
                            # wirte complete scopus data of the article !?
                            sql = "INSERT IGNORE INTO `citations` (`wpid`,`scopusid`,`eid`,`scopusdata`) VALUES (%s,%s,%s,%s)"
                            cursor.execute(sql, (wpid,pub['dc:identifier'],pub['eid'],pubstr))
                            connection.commit()

                        # Update the number of cites for this article
                        if n > citesloaded:
                            print "New citations found %d -> %d" %(citesloaded,n)
                            sql = "UPDATE `publications` SET `citesloaded`=" + str(n) + " WHERE wpid=" + str(wpid)
                            #print sql
                            cursor.execute(sql)
                            connection.commit()


    finally:
        connection.close()


def update_wp_posts():
    """ Create wordpress posts for all entries that have none """

    global npubs
    global nnewpubs

    print ""
    print "=== Create posts for newly registered publication in scopus"

    # Connect to the database
    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
        

    # Todo: Shift to a separate script !?
    try:
        with connection.cursor() as cursor:
            # Read a single record
            sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications WHERE wpid > 0"
            cursor.execute(sql)
            result = cursor.fetchall()

            print "Total number of publications is %d" % len(result)
            npubs = len(result)
            #print "Npubs = %d" % npubs

            # Count all publications
            #sql = "SELECT COUNT(id) FROM publications"
            #cursor.execute(sql)
            #result = cursor.fetchall()
            #if len(result) > 0:
            #print result[0]['COUNT(id)']

            # Read a single record
            sql = "SELECT scopusid,categories FROM publications WHERE wpid IS NULL"
            cursor.execute(sql)
            result = cursor.fetchall()
            if len(result) > 0:
                print "Number of new publications is %d" % len(result)
                nnewpubs = len(result)
            else:
                print "Nothing new found"


        # Retrieve all information required for the wordpress page
        for pub in result:
            print "Processing " + pub['scopusid'] + " categories " + pub['categories']

            data = get_scopus_data(pub['scopusid'])
            #print json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))

            # Parse categories
            catlist = []
            try:
                catlist = json.loads(pub['categories'])
            except TypeError:
                print("No categories specified")

            wpid = wordpress_post_by_scopus(data, catlist)
            
            #print wpid
            #print pub['scopusid']

            # Warning: the resulting string uses double quotes (") so use
            # single quotes (') for the sql command
            datastr = json.dumps(data)
            #print datastr

            # Update publication database !!!
            with connection.cursor() as cursor:
                # Read a single record
                #sql = "UPDATE publications SET wpid=" + str(wpid) + ",scopusdata='" + datastr + "' WHERE scopusid = '" + pub['scopusid'] + "'"
                sql = "UPDATE publications SET wpid=" + str(wpid) + " WHERE scopusid = '" + pub['scopusid'] + "'"
                cursor.execute(sql)
                connection.commit()

    finally:
        connection.close()


def update_wp_comments():
    """ Create a new comment for newly found citations """
    
    global ncites
    global nnewcites
    
    print ""
    print "=== Create comments for newly registered citations in scopus"
    
    
    # Connect to the database
    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
    
    
    # Todo: Shift to a separate script !?
    try:
        with connection.cursor() as cursor:
            # Count all citations
            sql = "SELECT COUNT(id) FROM citations"
            cursor.execute(sql)
            result = cursor.fetchall()
            if len(result) > 0:
                ncites = result[0]['COUNT(id)']
                #print result[0]['COUNT(id)']

            # Read a single record
            sql = "SELECT id,wpid,scopusdata FROM citations WHERE wpcommentid IS NULL"
            cursor.execute(sql)
            result = cursor.fetchall()
            print "Number of new citations is %d" % len(result)
            nnewcites = len(result)

            for pub in result:
                wpid = int(pub['wpid'])
                print "Processing post " + str(wpid)
            
                data = []
                try:
                    data = json.loads(pub['scopusdata'])
                except TypeError:
                    print("Scopus data missing?!")

                # If the creation of the comment fails, the wpcommentid 0 is
                # written to the database. This means, there is no second try
                # to get this citations added.
                # All failed comments can be found by searching for wpcommentid = 0
                #
                wpcommentid = 0
                try:
                    wpcommentid = wordpress_comment_by_scopus(wpid, data)
                except:
                    print "Error: Submission of comment failed"

                sql = "UPDATE citations SET wpcommentid=" + str(wpcommentid) + " WHERE id = '" + str(pub['id']) + "'"
                cursor.execute(sql)
                connection.commit()

    finally:
        connection.close()


# Main

# Prevent sphinx from execution
if __name__ == "__main__": 


    start = datetime.datetime.now()

    print ""
    print "***********************************************"
    print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****"
    print "***********************************************"
    print ""


    # Update publaction database; search for new publications
    # Loop over all user groups defined in ak_scopus.py

    # Todo: Detect, if there is no access to scopus !!!
    #

    search_param = '(PUBYEAR AFT %d)' % (sc_start)

    for wp in sc_workgroups:
        update_publications(wp['authors'],wp['name'],search_param)

    update_wp_posts()

    # read all citations
    # Todo: read only new citations?!
    if sc_citations:
        update_citations()

        # loop over all cites and post comments to wordpress, when necessary
        # update database

        update_wp_comments()


    # Display summary
    end = datetime.datetime.now()
    print ""
    print "Summary: (see also logfile %s) " % log_file
    print "Date       = " + str(start)
    print "NPubs      = " + str(npubs)
    print "NNewPubs   = " + str(nnewpubs)
    if sc_citations:
        print "NCites     = " + str(ncites)
        print "NNewCites  = " + str(nnewcites)
    print "Runtime    = " + str(end - start)


    # Write summary to log file
    if not os.path.isfile(log_file):
        print "Create logfile " + log_file
        # Open file and write header
        log = open(log_file,"w")
        log.write(__file__ + "\n")
        log.write("\n")
        log.write("      Date\t    Time\tNPubs\tNNewP\tNCite\tNNewC\t          TRun\n")
        log.write("------------------------------------------------------------------------------\n")
        log.close()


    log = open(log_file,"a")
    if sc_citations:
        log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
                                        start.strftime("%H:%M:%S"),
                                        npubs,nnewpubs,ncites,nnewcites,str(end-start)))
    else:
        log.write("%s\t%s\t%5d\t%5d\t%5s\t%5s\t%s\n" % (start.strftime("%Y-%m-%d"),
                                        start.strftime("%H:%M:%S"),
                                        npubs,nnewpubs,"","",str(end-start)))

    log.close()

    # done