""" Publication and citations retrieval *A. Kopmann, 6.2.17 (ak)* Scope: Publications are once added to wordpressas a post or comment. Afterwards scopus will not change or modify anything any more. Update is completely in the resonsibility of the ufo users. The operation of the script splits in four phases: - Read all publications for one or more author groups The groups are all defined in the configuration file The publications are stored in a local cache database - For all new publication a post in Wordpress is created. The post is added to the catogeries accouring to the matching author groups - For each publication the citations are requested and stored in the local cache database as well - For each new citation a Wordpress comment is created. Todo: - add mail to author button - save full scopus data in the database - Add a script to save the data for all publications in the database!!! There was some problem before?! - Add scripts to check consistence in the database and fix problems if detected E.g. search for wpcommentid == 0 Check if, wp posts + comments are still availabe, display deleted entries """ # Configuration - Scopus import datetime import requests import json import os.path from ak_scopus import get_scopus_list, get_scopus_data, get_scopus_refs from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus, wordpress_get_post # Mysql persistent data (Accout: scopus, $scopus$) import pymysql.cursors import pymysql from config import * # Summary npubs = 0 nnewpubs= 0 ncites = 0 nnewcites = 0 def update_publications(authids,authname='',scopus_opts = '',max=0): """ Read publications of a list of authors and store in the database """ print "=== Update of publications for the author group: " + authname #print str(authids) # Connect to the database connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor) # Request all publications of a list of authors (in one query) # Result: list of records with (scopus ids, eid, citedbycount) # The citation could be used later also by wordpress (may be via a plugin) publist = get_scopus_list(authids,scopus_opts,max) #publist = get_scopus_list(authids, scopus_opts, 3) #publist = get_scopus_list(authids, '(PUBYEAR AFT 2014)') print "Total number of publications: %d" % len(publist) #print publist # Save all publication to the publication database try: with connection.cursor() as cursor: for pub in publist: # 1 / Create a new records #print pub # Todo: strip the prefix SCOPUS_ID?! sql = "INSERT IGNORE INTO `publications` (`scopusid`,`eid`) VALUES (%s,%s)" cursor.execute(sql, (pub[0],pub[1])) sql = "UPDATE `publications` SET `citedbycount` = %s WHERE `scopusid` = %s" cursor.execute(sql, (pub[2],pub[0])) # 2 / Add categories if len(authname) > 0: catlist = [] sql = "SELECT categories FROM publications WHERE scopusid = %s" cursor.execute(sql, (pub[0])) result = cursor.fetchall() if len(result) > 0: #print "Categories %s" % result[0]['categories'] cat = result[0]['categories'] try: catlist = json.loads(cat) except TypeError: #print("No categories upto now") pass if authname not in catlist: catlist += [authname] sql = "UPDATE `publications` SET `categories` = %s WHERE `scopusid` = %s" cursor.execute(sql, (json.dumps(catlist),pub[0])) # connection is not autocommit by default. So you must commit to save # your changes. connection.commit() finally: connection.close() def update_citations(): """ Read all citations and store in the citation table """ print "" print "=== Update citatation of all publication in the database" # Connect to the database connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor) # Loop over the publications and read all citations from scopus # Todo: Shift to a separate script !? try: with connection.cursor() as cursor: # Read a single record sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications WHERE wpid > 0" cursor.execute(sql) result = cursor.fetchall() for pub in result: wpid = int(pub['wpid']) if pub['citedbycount'] is None: citedbycount = 0 else: citedbycount = int(pub['citedbycount']) if pub['citesloaded'] is None: citesloaded = 0 else: citesloaded = int(pub['citesloaded']) # read list of citations if pub['eid'] and (citedbycount > citesloaded): print "Processing %d = %s previously cited by %d" % (wpid, pub['eid'], citesloaded) data = get_scopus_refs(pub['eid']) #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': ')) n = len(data) #print "Number of citations loaded for processing %d" % n #print data if n > 0: for pub in data: #print pub['eid'] + ' ' + pub['dc:title'] try: pubstr = json.dumps(pub) except TypeError: print("Error serializing pub entry") # save all comments to the database # wirte complete scopus data of the article !? sql = "INSERT IGNORE INTO `citations` (`wpid`,`scopusid`,`eid`,`scopusdata`) VALUES (%s,%s,%s,%s)" cursor.execute(sql, (wpid,pub['dc:identifier'],pub['eid'],pubstr)) connection.commit() # Update the number of cites for this article if n > citesloaded: print "New citations found %d -> %d" %(citesloaded,n) sql = "UPDATE `publications` SET `citesloaded`=" + str(n) + " WHERE wpid=" + str(wpid) #print sql cursor.execute(sql) connection.commit() finally: connection.close() def update_wp_posts(): """ Create wordpress posts for all entries that have none """ global npubs global nnewpubs print "" print "=== Create posts for newly registered publication in scopus" # Connect to the database connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor) # Todo: Shift to a separate script !? try: with connection.cursor() as cursor: # Read a single record sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications WHERE wpid > 0" cursor.execute(sql) result = cursor.fetchall() print "Total number of publications is %d" % len(result) npubs = len(result) #print "Npubs = %d" % npubs # Count all publications #sql = "SELECT COUNT(id) FROM publications" #cursor.execute(sql) #result = cursor.fetchall() #if len(result) > 0: #print result[0]['COUNT(id)'] # Read a single record sql = "SELECT scopusid,categories FROM publications WHERE wpid IS NULL" cursor.execute(sql) result = cursor.fetchall() if len(result) > 0: print "Number of new publications is %d" % len(result) nnewpubs = len(result) else: print "Nothing new found" # Retrieve all information required for the wordpress page for pub in result: print "Processing " + pub['scopusid'] + " categories " + pub['categories'] data = get_scopus_data(pub['scopusid']) #print json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) # Parse categories catlist = [] try: catlist = json.loads(pub['categories']) except TypeError: print("No categories specified") wpid = wordpress_post_by_scopus(data, catlist) #print wpid #print pub['scopusid'] # Warning: the resulting string uses double quotes (") so use # single quotes (') for the sql command datastr = json.dumps(data) #print datastr # Update publication database !!! with connection.cursor() as cursor: # Read a single record #sql = "UPDATE publications SET wpid=" + str(wpid) + ",scopusdata='" + datastr + "' WHERE scopusid = '" + pub['scopusid'] + "'" sql = "UPDATE publications SET wpid=" + str(wpid) + " WHERE scopusid = '" + pub['scopusid'] + "'" cursor.execute(sql) connection.commit() finally: connection.close() def update_wp_comments(): """ Create a new comment for newly found citations """ global ncites global nnewcites print "" print "=== Create comments for newly registered citations in scopus" # Connect to the database connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor) # Todo: Shift to a separate script !? try: with connection.cursor() as cursor: # Count all citations sql = "SELECT COUNT(id) FROM citations" cursor.execute(sql) result = cursor.fetchall() if len(result) > 0: ncites = result[0]['COUNT(id)'] #print result[0]['COUNT(id)'] # Read a single record sql = "SELECT id,wpid,scopusdata FROM citations WHERE wpcommentid IS NULL" cursor.execute(sql) result = cursor.fetchall() print "Number of new citations is %d" % len(result) nnewcites = len(result) for pub in result: wpid = int(pub['wpid']) print "Processing post " + str(wpid) data = [] try: data = json.loads(pub['scopusdata']) except TypeError: print("Scopus data missing?!") # If the creation of the comment fails, the wpcommentid 0 is # written to the database. This means, there is no second try # to get this citations added. # All failed comments can be found by searching for wpcommentid = 0 # wpcommentid = 0 try: wpcommentid = wordpress_comment_by_scopus(wpid, data) except: print "Error: Submission of comment failed" sql = "UPDATE citations SET wpcommentid=" + str(wpcommentid) + " WHERE id = '" + str(pub['id']) + "'" cursor.execute(sql) connection.commit() finally: connection.close() # Main # Prevent sphinx from execution if __name__ == "__main__": start = datetime.datetime.now() print "" print "***********************************************" print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****" print "***********************************************" print "" # Update publaction database; search for new publications # Loop over all user groups defined in ak_scopus.py # Todo: Detect, if there is no access to scopus !!! # search_param = '(PUBYEAR AFT %d)' % (sc_start) for wp in sc_workgroups: update_publications(wp['authors'],wp['name'],search_param) update_wp_posts() # read all citations # Todo: read only new citations?! if sc_citations: update_citations() # loop over all cites and post comments to wordpress, when necessary # update database update_wp_comments() # Display summary end = datetime.datetime.now() print "" print "Summary: (see also logfile %s) " % log_file print "Date = " + str(start) print "NPubs = " + str(npubs) print "NNewPubs = " + str(nnewpubs) if sc_citations: print "NCites = " + str(ncites) print "NNewCites = " + str(nnewcites) print "Runtime = " + str(end - start) # Write summary to log file if not os.path.isfile(log_file): print "Create logfile " + log_file # Open file and write header log = open(log_file,"w") log.write(__file__ + "\n") log.write("\n") log.write(" Date\t Time\tNPubs\tNNewP\tNCite\tNNewC\t TRun\n") log.write("------------------------------------------------------------------------------\n") log.close() log = open(log_file,"a") if sc_citations: log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"), start.strftime("%H:%M:%S"), npubs,nnewpubs,ncites,nnewcites,str(end-start))) else: log.write("%s\t%s\t%5d\t%5d\t%5s\t%5s\t%s\n" % (start.strftime("%Y-%m-%d"), start.strftime("%H:%M:%S"), npubs,nnewpubs,"","",str(end-start))) log.close() # done