|
@@ -0,0 +1,413 @@
|
|
|
+# Get new publications
|
|
|
+# Publication and citations retrieval
|
|
|
+# A. Kopmann, 6.2.17 (ak)
|
|
|
+#
|
|
|
+# Scope:
|
|
|
+# Publications are once added to wordpressas a post or comment.
|
|
|
+# Afterwards scopus will not change or modify anything any more !!!
|
|
|
+# Update is completely in the resonsibility of the ufo users
|
|
|
+#
|
|
|
+
|
|
|
+# Todo:
|
|
|
+# - add mail to author button
|
|
|
+# - save full scopus data in the database
|
|
|
+#
|
|
|
+
|
|
|
+# Configuration - Scopus
|
|
|
+
|
|
|
+import datetime
|
|
|
+import requests
|
|
|
+import json
|
|
|
+import os.path
|
|
|
+
|
|
|
+from my_scopus import MY_API_KEY
|
|
|
+from my_scopus import ak, csa, pdv, ufo, ufo_pdv, ufo_ips, ufo_eps, ufo_apps
|
|
|
+from ak_scopus import get_scopus_list, get_scopus_data, get_scopus_refs
|
|
|
+
|
|
|
+
|
|
|
+from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus
|
|
|
+
|
|
|
+
|
|
|
+# Mysql persistent data (Accout: scopus, $scopus$)
|
|
|
+import pymysql.cursors
|
|
|
+import pymysql
|
|
|
+
|
|
|
+db_host = 'localhost'
|
|
|
+db_user = 'scopus'
|
|
|
+db_pw = '$scopus$'
|
|
|
+db_name = 'scopus'
|
|
|
+
|
|
|
+log_file = '/Users/kopmann/scopus-publications.log'
|
|
|
+
|
|
|
+
|
|
|
+# Summary
|
|
|
+npubs = 0
|
|
|
+nnewpubs= 0
|
|
|
+ncites = 0
|
|
|
+nnewcites = 0
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def update_publications(authids,authname='',scopus_opts = '',max=0):
|
|
|
+
|
|
|
+
|
|
|
+ print "=== Update of publications for the author group: " + authname
|
|
|
+ #print str(authids)
|
|
|
+
|
|
|
+
|
|
|
+ # Connect to the database
|
|
|
+ connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
|
|
|
+
|
|
|
+ # Request all publications of a list of authors (in one query)
|
|
|
+ # Result: list of records with (scopus ids, eid, citedbycount)
|
|
|
+ # The citation could be used later also by wordpress (may be via a plugin)
|
|
|
+
|
|
|
+ publist = get_scopus_list(authids,scopus_opts,max)
|
|
|
+ #publist = get_scopus_list(authids, scopus_opts, 3)
|
|
|
+ #publist = get_scopus_list(authids, '(PUBYEAR AFT 2014)')
|
|
|
+ print "Total number of publications: %d" % len(publist)
|
|
|
+ #print publist
|
|
|
+
|
|
|
+
|
|
|
+ # Save all publication to the publication database
|
|
|
+ try:
|
|
|
+ with connection.cursor() as cursor:
|
|
|
+ for pub in publist:
|
|
|
+ # 1 / Create a new records
|
|
|
+ #print pub # Todo: strip the prefix SCOPUS_ID?!
|
|
|
+ sql = "INSERT IGNORE INTO `publications` (`scopusid`,`eid`) VALUES (%s,%s)"
|
|
|
+ cursor.execute(sql, (pub[0],pub[1]))
|
|
|
+
|
|
|
+ sql = "UPDATE `publications` SET `citedbycount` = %s WHERE `scopusid` = %s"
|
|
|
+ cursor.execute(sql, (pub[2],pub[0]))
|
|
|
+
|
|
|
+ # 2 / Add categories
|
|
|
+ if len(authname) > 0:
|
|
|
+ catlist = []
|
|
|
+ sql = "SELECT categories FROM publications WHERE scopusid = %s"
|
|
|
+ cursor.execute(sql, (pub[0]))
|
|
|
+ result = cursor.fetchall()
|
|
|
+ if len(result) > 0:
|
|
|
+ #print "Categories %s" % result[0]['categories']
|
|
|
+ cat = result[0]['categories']
|
|
|
+
|
|
|
+ try:
|
|
|
+ catlist = json.loads(cat)
|
|
|
+ except TypeError:
|
|
|
+ print("No categories upto now")
|
|
|
+
|
|
|
+ if authname not in catlist:
|
|
|
+ catlist += [authname]
|
|
|
+
|
|
|
+ sql = "UPDATE `publications` SET `categories` = %s WHERE `scopusid` = %s"
|
|
|
+ cursor.execute(sql, (json.dumps(catlist),pub[0]))
|
|
|
+
|
|
|
+
|
|
|
+ # connection is not autocommit by default. So you must commit to save
|
|
|
+ # your changes.
|
|
|
+ connection.commit()
|
|
|
+
|
|
|
+ finally:
|
|
|
+ connection.close()
|
|
|
+
|
|
|
+
|
|
|
+# Read all citations and store in the citation table
|
|
|
+def update_citations():
|
|
|
+ global npubs
|
|
|
+
|
|
|
+ print ""
|
|
|
+ print "=== Update citatation of all publication in the database"
|
|
|
+
|
|
|
+ # Connect to the database
|
|
|
+ connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
|
|
|
+
|
|
|
+ # Loop over the publications and read all citations from scopus
|
|
|
+
|
|
|
+ # Todo: Shift to a separate script !?
|
|
|
+ try:
|
|
|
+ with connection.cursor() as cursor:
|
|
|
+ # Read a single record
|
|
|
+ sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications"
|
|
|
+ cursor.execute(sql)
|
|
|
+ result = cursor.fetchall()
|
|
|
+
|
|
|
+ print "Total number of publications is %d" % len(result)
|
|
|
+ npubs = len(result)
|
|
|
+ #print "Npubs = %d" % npubs
|
|
|
+
|
|
|
+ for pub in result:
|
|
|
+ wpid = int(pub['wpid'])
|
|
|
+ if pub['citedbycount'] is None:
|
|
|
+ citedbycount = 0
|
|
|
+ else:
|
|
|
+ citedbycount = int(pub['citedbycount'])
|
|
|
+ if pub['citesloaded'] is None:
|
|
|
+ citesloaded = 0
|
|
|
+ else:
|
|
|
+ citesloaded = int(pub['citesloaded'])
|
|
|
+
|
|
|
+ # read list of citations
|
|
|
+ if pub['eid'] and (citedbycount > citesloaded):
|
|
|
+
|
|
|
+ print "Processing %d = %s previously cited by %d" % (wpid, pub['eid'], citesloaded)
|
|
|
+
|
|
|
+ data = get_scopus_refs(pub['eid'])
|
|
|
+ #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': '))
|
|
|
+
|
|
|
+ n = len(data)
|
|
|
+ #print "Number of citations loaded for processing %d" % n
|
|
|
+ #print data
|
|
|
+
|
|
|
+ if n > 0:
|
|
|
+ for pub in data:
|
|
|
+ #print pub['eid'] + ' ' + pub['dc:title']
|
|
|
+
|
|
|
+ try:
|
|
|
+ pubstr = json.dumps(pub)
|
|
|
+ except TypeError:
|
|
|
+ print("Error serializing pub entry")
|
|
|
+
|
|
|
+ # save all comments to the database
|
|
|
+ # wirte complete scopus data of the article !?
|
|
|
+ sql = "INSERT IGNORE INTO `citations` (`wpid`,`scopusid`,`eid`,`scopusdata`) VALUES (%s,%s,%s,%s)"
|
|
|
+ cursor.execute(sql, (wpid,pub['dc:identifier'],pub['eid'],pubstr))
|
|
|
+ connection.commit()
|
|
|
+
|
|
|
+ # Update the number of cites for this article
|
|
|
+ if n > citesloaded:
|
|
|
+ print "New citations found %d -> %d" %(citesloaded,n)
|
|
|
+ sql = "UPDATE `publications` SET `citesloaded`=" + str(n) + " WHERE wpid=" + str(wpid)
|
|
|
+ #print sql
|
|
|
+ cursor.execute(sql)
|
|
|
+ connection.commit()
|
|
|
+
|
|
|
+
|
|
|
+ finally:
|
|
|
+ connection.close()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# Create wordpress posts for all entries that have none
|
|
|
+def update_wp_posts():
|
|
|
+ global nnewpubs
|
|
|
+
|
|
|
+ print ""
|
|
|
+ print "=== Create posts for newly registered publication in scopus"
|
|
|
+
|
|
|
+ # Connect to the database
|
|
|
+ connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
|
|
|
+
|
|
|
+
|
|
|
+ # Todo: Shift to a separate script !?
|
|
|
+ try:
|
|
|
+ with connection.cursor() as cursor:
|
|
|
+ # Count all publications
|
|
|
+ #sql = "SELECT COUNT(id) FROM publications"
|
|
|
+ #cursor.execute(sql)
|
|
|
+ #result = cursor.fetchall()
|
|
|
+ #if len(result) > 0:
|
|
|
+ #print result[0]['COUNT(id)']
|
|
|
+
|
|
|
+ # Read a single record
|
|
|
+ sql = "SELECT scopusid,categories FROM publications WHERE wpid IS NULL"
|
|
|
+ cursor.execute(sql)
|
|
|
+ result = cursor.fetchall()
|
|
|
+ if len(result) > 0:
|
|
|
+ print "Number of new publications is %d" % len(result)
|
|
|
+ nnewpubs = len(result)
|
|
|
+ else:
|
|
|
+ print "Nothing new found"
|
|
|
+
|
|
|
+
|
|
|
+ # Retrieve all information required for the wordpress page
|
|
|
+ for pub in result:
|
|
|
+ print "Processing " + pub['scopusid'] + " categories " + pub['categories']
|
|
|
+
|
|
|
+ data = get_scopus_data(pub['scopusid'])
|
|
|
+ #print json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
|
|
|
+
|
|
|
+ # Parse categories
|
|
|
+ catlist = []
|
|
|
+ try:
|
|
|
+ catlist = json.loads(pub['categories'])
|
|
|
+ except TypeError:
|
|
|
+ print("No categories specified")
|
|
|
+
|
|
|
+ wpid = wordpress_post_by_scopus(data, catlist)
|
|
|
+
|
|
|
+ #print wpid
|
|
|
+ #print pub['scopusid']
|
|
|
+
|
|
|
+ # Warning: the resulting string uses double quotes (") so use
|
|
|
+ # single quotes (') for the sql command
|
|
|
+ datastr = json.dumps(data)
|
|
|
+ #print datastr
|
|
|
+
|
|
|
+ # Update publication database !!!
|
|
|
+ with connection.cursor() as cursor:
|
|
|
+ # Read a single record
|
|
|
+ #sql = "UPDATE publications SET wpid=" + str(wpid) + ",scopusdata='" + datastr + "' WHERE scopusid = '" + pub['scopusid'] + "'"
|
|
|
+ sql = "UPDATE publications SET wpid=" + str(wpid) + " WHERE scopusid = '" + pub['scopusid'] + "'"
|
|
|
+ cursor.execute(sql)
|
|
|
+ connection.commit()
|
|
|
+
|
|
|
+ finally:
|
|
|
+ connection.close()
|
|
|
+
|
|
|
+
|
|
|
+def update_wp_comments():
|
|
|
+ global ncites
|
|
|
+ global nnewcites
|
|
|
+
|
|
|
+ print ""
|
|
|
+ print "=== Create comments for newly registered citations in scopus"
|
|
|
+
|
|
|
+
|
|
|
+ # Connect to the database
|
|
|
+ connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
|
|
|
+
|
|
|
+
|
|
|
+ # Todo: Shift to a separate script !?
|
|
|
+ try:
|
|
|
+ with connection.cursor() as cursor:
|
|
|
+ # Count all citations
|
|
|
+ sql = "SELECT COUNT(id) FROM citations"
|
|
|
+ cursor.execute(sql)
|
|
|
+ result = cursor.fetchall()
|
|
|
+ if len(result) > 0:
|
|
|
+ ncites = result[0]['COUNT(id)']
|
|
|
+ #print result[0]['COUNT(id)']
|
|
|
+
|
|
|
+ # Read a single record
|
|
|
+ sql = "SELECT id,wpid,scopusdata FROM citations WHERE wpcommentid IS NULL"
|
|
|
+ cursor.execute(sql)
|
|
|
+ result = cursor.fetchall()
|
|
|
+ print "Number of new citations is %d" % len(result)
|
|
|
+ nnewcites = len(result)
|
|
|
+
|
|
|
+ for pub in result:
|
|
|
+ wpid = int(pub['wpid'])
|
|
|
+ print "Processing post " + str(wpid)
|
|
|
+
|
|
|
+ data = []
|
|
|
+ try:
|
|
|
+ data = json.loads(pub['scopusdata'])
|
|
|
+ except TypeError:
|
|
|
+ print("Scopus data missing?!")
|
|
|
+
|
|
|
+ # If the creation of the comment fails, the wpcommentid 0 is
|
|
|
+ # written to the database. This means, there is no second try
|
|
|
+ # to get this citations added.
|
|
|
+ # All failed comments can be found by searching for wpcommentid = 0
|
|
|
+ #
|
|
|
+ wpcommentid = 0
|
|
|
+ try:
|
|
|
+ wpcommentid = wordpress_comment_by_scopus(wpid, data)
|
|
|
+ except:
|
|
|
+ print "Error: Submission of comment failed"
|
|
|
+
|
|
|
+ sql = "UPDATE citations SET wpcommentid=" + str(wpcommentid) + " WHERE id = '" + str(pub['id']) + "'"
|
|
|
+ cursor.execute(sql)
|
|
|
+ connection.commit()
|
|
|
+
|
|
|
+ finally:
|
|
|
+ connection.close()
|
|
|
+
|
|
|
+
|
|
|
+# Todo: Add a script to save the data for all publications in the database!!!
|
|
|
+# There was some problem before?!
|
|
|
+#
|
|
|
+
|
|
|
+# Todo: Add scripts to check consistence in the database
|
|
|
+# and fix problems if detected
|
|
|
+# E.g. search for wpcommentid == 0
|
|
|
+# Check if, wp posts + comments are still availabe, display
|
|
|
+# deleted entries
|
|
|
+#
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# Main
|
|
|
+
|
|
|
+start = datetime.datetime.now()
|
|
|
+
|
|
|
+print ""
|
|
|
+print "***********************************************"
|
|
|
+print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****"
|
|
|
+print "***********************************************"
|
|
|
+print ""
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# Update publaction database; search for new publications
|
|
|
+# Loop over all user groups defined in ak_scopus.py
|
|
|
+
|
|
|
+# Todo: Detect, if there is no access to scopus !!!
|
|
|
+#
|
|
|
+
|
|
|
+# Define the author, that should be considered
|
|
|
+#authors = ["Computing", ufo_pdv]
|
|
|
+#print authors
|
|
|
+
|
|
|
+#update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)',10)
|
|
|
+update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)')
|
|
|
+update_publications(ufo_ips, "X-ray Imaging", '(PUBYEAR AFT 2010)')
|
|
|
+update_publications(ufo_eps, "Electronics", '(PUBYEAR AFT 2010)')
|
|
|
+update_publications(ufo_apps, "Morphology", '(PUBYEAR AFT 2010)')
|
|
|
+
|
|
|
+
|
|
|
+update_wp_posts()
|
|
|
+
|
|
|
+# read all citations
|
|
|
+# Todo: read only new citations?!
|
|
|
+
|
|
|
+
|
|
|
+update_citations()
|
|
|
+
|
|
|
+
|
|
|
+# loop over all cites and post comments to wordpress, when necessary
|
|
|
+# update database
|
|
|
+
|
|
|
+update_wp_comments()
|
|
|
+# Todo: deactivate comments for scopus posts!!!
|
|
|
+
|
|
|
+
|
|
|
+# Display summary
|
|
|
+end = datetime.datetime.now()
|
|
|
+print ""
|
|
|
+print "Summary: (see also logfile %s) " % log_file
|
|
|
+print "Date = " + str(start)
|
|
|
+print "NPubs = " + str(npubs)
|
|
|
+print "NNewPubs = " + str(nnewpubs)
|
|
|
+print "NCites = " + str(ncites)
|
|
|
+print "NNewCites = " + str(nnewcites)
|
|
|
+print "Runtime = " + str(end - start)
|
|
|
+
|
|
|
+
|
|
|
+# Write summary to log file
|
|
|
+if not os.path.isfile(log_file):
|
|
|
+ print "Create logfile " + log_file
|
|
|
+ # Open file and write header
|
|
|
+ log = open(log_file,"w")
|
|
|
+ log.write(__file__ + "\n")
|
|
|
+ log.write("\n")
|
|
|
+ log.write(" Date\t Time\tNPubs\tNNewP\tNCite\tNNewC\t TRun\n")
|
|
|
+ log.write("------------------------------------------------------------------------------\n")
|
|
|
+ log.close()
|
|
|
+
|
|
|
+
|
|
|
+log = open(log_file,"a")
|
|
|
+log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
|
|
|
+ start.strftime("%H:%M:%S"),
|
|
|
+ npubs,nnewpubs,ncites,nnewcites,str(end-start)))
|
|
|
+log.close()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# done
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|