scopus-update-database.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # Get new publications
  2. # Publication and citations retrieval
  3. # A. Kopmann, 12.4.17 (ak)
  4. #
  5. # Scope:
  6. # Synchronize publication and citation database with
  7. # the actual state of posts in wordpress.
  8. #
  9. # Post might be deleted, if not fitting to the
  10. # scope of the website - mark these posts by wpid=0
  11. # in publication database and drop all citations
  12. #
  13. import datetime
  14. import requests
  15. import json
  16. import os.path
  17. from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus, wordpress_get_post
  18. # Mysql persistent data (Accout: scopus, $scopus$)
  19. import pymysql.cursors
  20. import pymysql
  21. from config import *
  22. # Summary
  23. npubs = 0
  24. # Check if all WP post stored in the database are still existing
  25. # Todo:
  26. # - Download full article description
  27. # - Update categories for author groups in wordpress
  28. #
  29. def update_database():
  30. global npubs
  31. print "=== Check validity of publications database "
  32. # Connect to the database
  33. connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
  34. # Save all publication to the publication database
  35. try:
  36. with connection.cursor() as cursor:
  37. sql = "SELECT wpid FROM `publications` WHERE wpid > 0"
  38. cursor.execute(sql)
  39. result = cursor.fetchall()
  40. for pub in result:
  41. id = int(pub['wpid'])
  42. if not wordpress_get_post(id):
  43. print "Marking post %d as ununsed; deleting citations" % id
  44. sql = "DELETE FROM `citations` WHERE `wpid` = %s"
  45. cursor.execute(sql, (id) )
  46. sql = "UPDATE `publications` SET `wpid` = 0 WHERE `wpid` = %s"
  47. cursor.execute(sql, (id) )
  48. npubs += 1
  49. # connection is not autocommit by default. So you must commit to save
  50. # your changes.
  51. connection.commit()
  52. finally:
  53. connection.close()
  54. # Main
  55. start = datetime.datetime.now()
  56. print ""
  57. print "***********************************************"
  58. print "**** scopus-update-database / " + start.strftime("%Y-%m-%d") + " *****"
  59. print "***********************************************"
  60. print ""
  61. # Update database
  62. update_database()
  63. # Display summary
  64. end = datetime.datetime.now()
  65. print ""
  66. print "Summary: "
  67. print "Date = " + str(start)
  68. print "NPubs removed = " + str(npubs)
  69. print "Runtime = " + str(end - start)
  70. print