# Get new publications # Publication and citations retrieval # A. Kopmann, 12.4.17 (ak) # # Scope: # Synchronize publication and citation database with # the actual state of posts in wordpress. # # Post might be deleted, if not fitting to the # scope of the website - mark these posts by wpid=0 # in publication database and drop all citations # import datetime import requests import json import os.path from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus, wordpress_get_post # Mysql persistent data (Accout: scopus, $scopus$) import pymysql.cursors import pymysql from config import * # Summary npubs = 0 # Check if all WP post stored in the database are still existing # Todo: # - Download full article description # - Update categories for author groups in wordpress # def update_database(): global npubs print "=== Check validity of publications database " # Connect to the database connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor) # Save all publication to the publication database try: with connection.cursor() as cursor: sql = "SELECT wpid FROM `publications` WHERE wpid > 0" cursor.execute(sql) result = cursor.fetchall() for pub in result: id = int(pub['wpid']) if not wordpress_get_post(id): print "Marking post %d as ununsed; deleting citations" % id sql = "DELETE FROM `citations` WHERE `wpid` = %s" cursor.execute(sql, (id) ) sql = "UPDATE `publications` SET `wpid` = 0 WHERE `wpid` = %s" cursor.execute(sql, (id) ) npubs += 1 # connection is not autocommit by default. So you must commit to save # your changes. connection.commit() finally: connection.close() # Main start = datetime.datetime.now() print "" print "***********************************************" print "**** scopus-update-database / " + start.strftime("%Y-%m-%d") + " *****" print "***********************************************" print "" # Update database update_database() # Display summary end = datetime.datetime.now() print "" print "Summary: " print "Date = " + str(start) print "NPubs removed = " + str(npubs) print "Runtime = " + str(end - start) print