scopus-update-database.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # Get new publications
  2. # Publication and citations retrieval
  3. # A. Kopmann, 12.4.17 (ak)
  4. #
  5. # Scope:
  6. # Synchronize publication and citation database with
  7. # the actual state of posts in wordpress.
  8. #
  9. # Post might be deleted, if not fitting to the
  10. # scope of the website - mark these posts by wpid=0
  11. # in publication database and drop all citations
  12. #
  13. import datetime
  14. import requests
  15. import json
  16. import os.path
  17. from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus, wordpress_get_post
  18. # Mysql persistent data (Accout: scopus, $scopus$)
  19. import pymysql.cursors
  20. import pymysql
  21. from config import *
  22. # Summary
  23. npubs = 0
  24. # Check if all WP post stored in the database are still existing
  25. # Todo:
  26. # - Download full article description
  27. # - Update categories for author groups in wordpress
  28. #
  29. def update_database():
  30. global npubs
  31. print "=== Check validity of publications database "
  32. # Connect to the database
  33. connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
  34. # Save all publication to the publication database
  35. try:
  36. with connection.cursor() as cursor:
  37. sql = "SELECT wpid FROM `publications` WHERE wpid > 0"
  38. cursor.execute(sql)
  39. result = cursor.fetchall()
  40. for pub in result:
  41. id = int(pub['wpid'])
  42. if (id > 0):
  43. if not wordpress_get_post(id):
  44. print "Marking post %d as ununsed; deleting citations" % id
  45. sql = "DELETE FROM `citations` WHERE `wpid` = %s"
  46. cursor.execute(sql, (id) )
  47. sql = "UPDATE `publications` SET `wpid` = 0 WHERE `wpid` = %s"
  48. cursor.execute(sql, (id) )
  49. npubs += 1
  50. # connection is not autocommit by default. So you must commit to save
  51. # your changes.
  52. connection.commit()
  53. finally:
  54. connection.close()
  55. # Main
  56. start = datetime.datetime.now()
  57. print ""
  58. print "***********************************************"
  59. print "**** scopus-update-database / " + start.strftime("%Y-%m-%d") + " *****"
  60. print "***********************************************"
  61. print ""
  62. # Update database
  63. update_database()
  64. # Display summary
  65. end = datetime.datetime.now()
  66. print ""
  67. print "Summary: "
  68. print "Date = " + str(start)
  69. print "NPubs removed = " + str(npubs)
  70. print "Runtime = " + str(end - start)
  71. print