scopus-update-database.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # Get new publications
  2. # Publication and citations retrieval
  3. # A. Kopmann, 12.4.17 (ak)
  4. #
  5. # Scope:
  6. # Synchronize publication and citation database with
  7. # the actual state of posts in wordpress.
  8. #
  9. # Post might be deleted, if not fitting to the
  10. # scope of the website - mark these posts by wpid=0
  11. # in publication database and drop all citations
  12. #
  13. import datetime
  14. import requests
  15. import json
  16. import os.path
  17. from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus, wordpress_get_post
  18. # Mysql persistent data (Accout: scopus, $scopus$)
  19. import pymysql.cursors
  20. import pymysql
  21. db_host = 'localhost'
  22. db_user = 'scopus'
  23. db_pw = '$scopus$'
  24. db_name = 'scopus'
  25. # Summary
  26. npubs = 0
  27. # Check if all WP post stored in the database are still existing
  28. # Todo:
  29. # - Download full article description
  30. # - Update categories for author groups in wordpress
  31. #
  32. def update_database():
  33. print "=== Check validity of publications database "
  34. # Connect to the database
  35. connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
  36. # Save all publication to the publication database
  37. try:
  38. with connection.cursor() as cursor:
  39. sql = "SELECT wpid FROM `publications`"
  40. cursor.execute(sql)
  41. result = cursor.fetchall()
  42. for pub in result:
  43. id = int(pub['wpid'])
  44. if (id > 0):
  45. if not wordpress_get_post(id):
  46. print "Marking post %d as ununsed; deleting citations" % id
  47. sql = "DELETE FROM `citations` WHERE `wpid` = %s"
  48. cursor.execute(sql, (id) )
  49. sql = "UPDATE `publications` SET `wpid` = 0 WHERE `wpid` = %s"
  50. cursor.execute(sql, (id) )
  51. npubs += 1
  52. # connection is not autocommit by default. So you must commit to save
  53. # your changes.
  54. connection.commit()
  55. finally:
  56. connection.close()
  57. # Main
  58. start = datetime.datetime.now()
  59. print ""
  60. print "***********************************************"
  61. print "**** scopus-update-database / " + start.strftime("%Y-%m-%d") + " *****"
  62. print "***********************************************"
  63. print ""
  64. # Update database
  65. update_database()
  66. # Display summary
  67. end = datetime.datetime.now()
  68. print ""
  69. print "Summary: "
  70. print "Date = " + str(start)
  71. print "NPubs removed = " + str(npubs)
  72. print "Runtime = " + str(end - start)
  73. print