123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- # Get new publications
- # Publication and citations retrieval
- # A. Kopmann, 12.4.17 (ak)
- #
- # Scope:
- # Synchronize publication and citation database with
- # the actual state of posts in wordpress.
- #
- # Post might be deleted, if not fitting to the
- # scope of the website - mark these posts by wpid=0
- # in publication database and drop all citations
- #
- import datetime
- import requests
- import json
- import os.path
- from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus, wordpress_get_post
- # Mysql persistent data (Accout: scopus, $scopus$)
- import pymysql.cursors
- import pymysql
- from config import *
- # Summary
- npubs = 0
- # Check if all WP post stored in the database are still existing
- # Todo:
- # - Download full article description
- # - Update categories for author groups in wordpress
- #
- def update_database():
- global npubs
-
- print "=== Check validity of publications database "
-
- # Connect to the database
- connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
-
- # Save all publication to the publication database
- try:
- with connection.cursor() as cursor:
-
- sql = "SELECT wpid FROM `publications` WHERE wpid > 0"
- cursor.execute(sql)
-
- result = cursor.fetchall()
-
- for pub in result:
- id = int(pub['wpid'])
-
- if not wordpress_get_post(id):
- print "Marking post %d as ununsed; deleting citations" % id
-
- sql = "DELETE FROM `citations` WHERE `wpid` = %s"
- cursor.execute(sql, (id) )
-
- sql = "UPDATE `publications` SET `wpid` = 0 WHERE `wpid` = %s"
- cursor.execute(sql, (id) )
- npubs += 1
- # connection is not autocommit by default. So you must commit to save
- # your changes.
- connection.commit()
- finally:
- connection.close()
- # Main
- start = datetime.datetime.now()
- print ""
- print "***********************************************"
- print "**** scopus-update-database / " + start.strftime("%Y-%m-%d") + " *****"
- print "***********************************************"
- print ""
- # Update database
- update_database()
- # Display summary
- end = datetime.datetime.now()
- print ""
- print "Summary: "
- print "Date = " + str(start)
- print "NPubs removed = " + str(npubs)
- print "Runtime = " + str(end - start)
- print
|