123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413 |
- # Get new publications
- # Publication and citations retrieval
- # A. Kopmann, 6.2.17 (ak)
- #
- # Scope:
- # Publications are once added to wordpressas a post or comment.
- # Afterwards scopus will not change or modify anything any more !!!
- # Update is completely in the resonsibility of the ufo users
- #
- # Todo:
- # - add mail to author button
- # - save full scopus data in the database
- #
- # Configuration - Scopus
- import datetime
- import requests
- import json
- import os.path
- from my_scopus import MY_API_KEY
- from my_scopus import ak, csa, pdv, ufo, ufo_pdv, ufo_ips, ufo_eps, ufo_apps
- from ak_scopus import get_scopus_list, get_scopus_data, get_scopus_refs
- from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus
- # Mysql persistent data (Accout: scopus, $scopus$)
- import pymysql.cursors
- import pymysql
- db_host = 'localhost'
- db_user = 'scopus'
- db_pw = '$scopus$'
- db_name = 'scopus'
- log_file = '/Users/kopmann/scopus-publications.log'
- # Summary
- npubs = 0
- nnewpubs= 0
- ncites = 0
- nnewcites = 0
- def update_publications(authids,authname='',scopus_opts = '',max=0):
- print "=== Update of publications for the author group: " + authname
- #print str(authids)
- # Connect to the database
- connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
- # Request all publications of a list of authors (in one query)
- # Result: list of records with (scopus ids, eid, citedbycount)
- # The citation could be used later also by wordpress (may be via a plugin)
- publist = get_scopus_list(authids,scopus_opts,max)
- #publist = get_scopus_list(authids, scopus_opts, 3)
- #publist = get_scopus_list(authids, '(PUBYEAR AFT 2014)')
- print "Total number of publications: %d" % len(publist)
- #print publist
- # Save all publication to the publication database
- try:
- with connection.cursor() as cursor:
- for pub in publist:
- # 1 / Create a new records
- #print pub # Todo: strip the prefix SCOPUS_ID?!
- sql = "INSERT IGNORE INTO `publications` (`scopusid`,`eid`) VALUES (%s,%s)"
- cursor.execute(sql, (pub[0],pub[1]))
-
- sql = "UPDATE `publications` SET `citedbycount` = %s WHERE `scopusid` = %s"
- cursor.execute(sql, (pub[2],pub[0]))
- # 2 / Add categories
- if len(authname) > 0:
- catlist = []
- sql = "SELECT categories FROM publications WHERE scopusid = %s"
- cursor.execute(sql, (pub[0]))
- result = cursor.fetchall()
- if len(result) > 0:
- #print "Categories %s" % result[0]['categories']
- cat = result[0]['categories']
- try:
- catlist = json.loads(cat)
- except TypeError:
- print("No categories upto now")
- if authname not in catlist:
- catlist += [authname]
-
- sql = "UPDATE `publications` SET `categories` = %s WHERE `scopusid` = %s"
- cursor.execute(sql, (json.dumps(catlist),pub[0]))
-
- # connection is not autocommit by default. So you must commit to save
- # your changes.
- connection.commit()
- finally:
- connection.close()
- # Read all citations and store in the citation table
- def update_citations():
- global npubs
-
- print ""
- print "=== Update citatation of all publication in the database"
-
- # Connect to the database
- connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
- # Loop over the publications and read all citations from scopus
- # Todo: Shift to a separate script !?
- try:
- with connection.cursor() as cursor:
- # Read a single record
- sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications"
- cursor.execute(sql)
- result = cursor.fetchall()
-
- print "Total number of publications is %d" % len(result)
- npubs = len(result)
- #print "Npubs = %d" % npubs
- for pub in result:
- wpid = int(pub['wpid'])
- if pub['citedbycount'] is None:
- citedbycount = 0
- else:
- citedbycount = int(pub['citedbycount'])
- if pub['citesloaded'] is None:
- citesloaded = 0
- else:
- citesloaded = int(pub['citesloaded'])
- # read list of citations
- if pub['eid'] and (citedbycount > citesloaded):
-
- print "Processing %d = %s previously cited by %d" % (wpid, pub['eid'], citesloaded)
- data = get_scopus_refs(pub['eid'])
- #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': '))
-
- n = len(data)
- #print "Number of citations loaded for processing %d" % n
- #print data
-
- if n > 0:
- for pub in data:
- #print pub['eid'] + ' ' + pub['dc:title']
-
- try:
- pubstr = json.dumps(pub)
- except TypeError:
- print("Error serializing pub entry")
- # save all comments to the database
- # wirte complete scopus data of the article !?
- sql = "INSERT IGNORE INTO `citations` (`wpid`,`scopusid`,`eid`,`scopusdata`) VALUES (%s,%s,%s,%s)"
- cursor.execute(sql, (wpid,pub['dc:identifier'],pub['eid'],pubstr))
- connection.commit()
- # Update the number of cites for this article
- if n > citesloaded:
- print "New citations found %d -> %d" %(citesloaded,n)
- sql = "UPDATE `publications` SET `citesloaded`=" + str(n) + " WHERE wpid=" + str(wpid)
- #print sql
- cursor.execute(sql)
- connection.commit()
- finally:
- connection.close()
- # Create wordpress posts for all entries that have none
- def update_wp_posts():
- global nnewpubs
- print ""
- print "=== Create posts for newly registered publication in scopus"
- # Connect to the database
- connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
-
- # Todo: Shift to a separate script !?
- try:
- with connection.cursor() as cursor:
- # Count all publications
- #sql = "SELECT COUNT(id) FROM publications"
- #cursor.execute(sql)
- #result = cursor.fetchall()
- #if len(result) > 0:
- #print result[0]['COUNT(id)']
- # Read a single record
- sql = "SELECT scopusid,categories FROM publications WHERE wpid IS NULL"
- cursor.execute(sql)
- result = cursor.fetchall()
- if len(result) > 0:
- print "Number of new publications is %d" % len(result)
- nnewpubs = len(result)
- else:
- print "Nothing new found"
- # Retrieve all information required for the wordpress page
- for pub in result:
- print "Processing " + pub['scopusid'] + " categories " + pub['categories']
- data = get_scopus_data(pub['scopusid'])
- #print json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
- # Parse categories
- catlist = []
- try:
- catlist = json.loads(pub['categories'])
- except TypeError:
- print("No categories specified")
- wpid = wordpress_post_by_scopus(data, catlist)
-
- #print wpid
- #print pub['scopusid']
- # Warning: the resulting string uses double quotes (") so use
- # single quotes (') for the sql command
- datastr = json.dumps(data)
- #print datastr
- # Update publication database !!!
- with connection.cursor() as cursor:
- # Read a single record
- #sql = "UPDATE publications SET wpid=" + str(wpid) + ",scopusdata='" + datastr + "' WHERE scopusid = '" + pub['scopusid'] + "'"
- sql = "UPDATE publications SET wpid=" + str(wpid) + " WHERE scopusid = '" + pub['scopusid'] + "'"
- cursor.execute(sql)
- connection.commit()
- finally:
- connection.close()
- def update_wp_comments():
- global ncites
- global nnewcites
-
- print ""
- print "=== Create comments for newly registered citations in scopus"
-
-
- # Connect to the database
- connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
-
-
- # Todo: Shift to a separate script !?
- try:
- with connection.cursor() as cursor:
- # Count all citations
- sql = "SELECT COUNT(id) FROM citations"
- cursor.execute(sql)
- result = cursor.fetchall()
- if len(result) > 0:
- ncites = result[0]['COUNT(id)']
- #print result[0]['COUNT(id)']
- # Read a single record
- sql = "SELECT id,wpid,scopusdata FROM citations WHERE wpcommentid IS NULL"
- cursor.execute(sql)
- result = cursor.fetchall()
- print "Number of new citations is %d" % len(result)
- nnewcites = len(result)
- for pub in result:
- wpid = int(pub['wpid'])
- print "Processing post " + str(wpid)
-
- data = []
- try:
- data = json.loads(pub['scopusdata'])
- except TypeError:
- print("Scopus data missing?!")
- # If the creation of the comment fails, the wpcommentid 0 is
- # written to the database. This means, there is no second try
- # to get this citations added.
- # All failed comments can be found by searching for wpcommentid = 0
- #
- wpcommentid = 0
- try:
- wpcommentid = wordpress_comment_by_scopus(wpid, data)
- except:
- print "Error: Submission of comment failed"
- sql = "UPDATE citations SET wpcommentid=" + str(wpcommentid) + " WHERE id = '" + str(pub['id']) + "'"
- cursor.execute(sql)
- connection.commit()
- finally:
- connection.close()
- # Todo: Add a script to save the data for all publications in the database!!!
- # There was some problem before?!
- #
- # Todo: Add scripts to check consistence in the database
- # and fix problems if detected
- # E.g. search for wpcommentid == 0
- # Check if, wp posts + comments are still availabe, display
- # deleted entries
- #
- # Main
- start = datetime.datetime.now()
- print ""
- print "***********************************************"
- print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****"
- print "***********************************************"
- print ""
- # Update publaction database; search for new publications
- # Loop over all user groups defined in ak_scopus.py
- # Todo: Detect, if there is no access to scopus !!!
- #
- # Define the author, that should be considered
- #authors = ["Computing", ufo_pdv]
- #print authors
- #update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)',10)
- update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)')
- update_publications(ufo_ips, "X-ray Imaging", '(PUBYEAR AFT 2010)')
- update_publications(ufo_eps, "Electronics", '(PUBYEAR AFT 2010)')
- update_publications(ufo_apps, "Morphology", '(PUBYEAR AFT 2010)')
- update_wp_posts()
- # read all citations
- # Todo: read only new citations?!
- update_citations()
- # loop over all cites and post comments to wordpress, when necessary
- # update database
- update_wp_comments()
- # Todo: deactivate comments for scopus posts!!!
- # Display summary
- end = datetime.datetime.now()
- print ""
- print "Summary: (see also logfile %s) " % log_file
- print "Date = " + str(start)
- print "NPubs = " + str(npubs)
- print "NNewPubs = " + str(nnewpubs)
- print "NCites = " + str(ncites)
- print "NNewCites = " + str(nnewcites)
- print "Runtime = " + str(end - start)
- # Write summary to log file
- if not os.path.isfile(log_file):
- print "Create logfile " + log_file
- # Open file and write header
- log = open(log_file,"w")
- log.write(__file__ + "\n")
- log.write("\n")
- log.write(" Date\t Time\tNPubs\tNNewP\tNCite\tNNewC\t TRun\n")
- log.write("------------------------------------------------------------------------------\n")
- log.close()
- log = open(log_file,"a")
- log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
- start.strftime("%H:%M:%S"),
- npubs,nnewpubs,ncites,nnewcites,str(end-start)))
- log.close()
- # done
|