Browse Source

Added filter for collaboration papers with too many authors

Andreas Kopmann 5 years ago
parent
commit
b804481a8c
7 changed files with 140 additions and 28 deletions
  1. 1 1
      ak_scopus.py
  2. 46 9
      ak_wordpress.py
  3. 5 2
      etc/config_held_de.py
  4. 13 0
      log/scopus-publications-held-de.log
  5. 33 0
      rm-scopusid.py
  6. 8 8
      test-scopus.py
  7. 34 8
      test-wp.py

+ 1 - 1
ak_scopus.py

@@ -114,7 +114,7 @@ def get_scopus_data(SCOPUS_ID):
     url = ("http://api.elsevier.com/content/abstract/scopus_id/"
            + SCOPUS_ID
            + "?field=article-number,title,publicationName,volume,issueIdentifier,"
-           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
+           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn,authkeywords")
     #print url
     resp = requests.get(url,
             headers={'Accept':'application/json',

+ 46 - 9
ak_wordpress.py

@@ -6,11 +6,13 @@
 
 from datetime import datetime
 import json
+from pprint import pprint
 
 from wordpress_xmlrpc import Client
 from wordpress_xmlrpc import WordPressPost, WordPressComment
 from wordpress_xmlrpc.methods.posts import GetPost, NewPost, EditPost
 from wordpress_xmlrpc.methods.comments import NewComment, EditComment
+from wordpress_xmlrpc.methods.taxonomies import GetTerms
 
 from config import *
 
@@ -18,6 +20,19 @@ from config import *
 wp = Client(wp_api_url, wp_user, wp_password) 
 
 
+#
+# Get category from slug name used in the configuration file
+#
+
+def wordpress_get_category(slug):
+     """ Load taxonomy and search for the slug """
+     catlist = wp.call(GetTerms('category'))
+
+     for cat in catlist:
+         if cat.slug == slug:
+             return cat
+
+
 #
 # query post
 #
@@ -42,14 +57,19 @@ def wordpress_get_post(wpid):
 #
 def wordpress_post_by_scopus(data, category = []):
     """ Create a new post based on the Scopus information """
+
+    #print data['abstracts-retrieval-response']
  
     try:
         coredata = data['abstracts-retrieval-response']['coredata']
         authors = data['abstracts-retrieval-response']['authors']['author']
+
     except KeyError:
+        pprint(data)
+        print ""
         print "Have not found authors in dataset"
         print " -> Is the connection to scopus broken???"
-        exit()
+        return(0)
 
     # decode date
     tsstring = coredata['prism:coverDate'].encode('utf-8')
@@ -114,21 +134,38 @@ def wordpress_post_by_scopus(data, category = []):
 
     #print post.content
 
-    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+    #post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
 
+    catlist = []
+    for slug in category:
+        cat = wordpress_get_category(slug)
+        catlist.append(cat)
+    post.terms = catlist
+
+    try:
+        taglist = []
+        for tag in data['abstracts-retrieval-response']['authkeywords']['author-keyword']:
+            print "Keyword: ", tag
+            taglist.append(tag['$'])
+    except:
+        pass
 
-    if category == '':
-        catlist = ['Publications']
-    else:
-        catlist = ['Publications'] + category
     post.terms_names = {
-            'category': catlist # defined in WP + python script
+            'category': ['Publications'],
+            'post_tag': taglist
         }
 
     # whoops, I forgot to publish it!
-    post.post_status = 'publish' # alternative is draft here !
+    if len(authors) > sc_max_authors:
+        post.post_status = 'draft' # check how to handle publication in wordpress
+        print "Too many authors %d - set to draft" % (len(authors))
+    else:
+        post.post_status = 'publish' # handled as a standard publication
     post.comment_status = 'closed' # allow comments - may be only for scopus
-    wp.call(EditPost(post.id, post))# Update the before created post
+
+    # Todo: this can fail! Add proper error handling 
+    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+    #wp.call(EditPost(post.id, post))# Update the before created post
 
     # need to update the database !!!
     return post.id

+ 5 - 2
etc/config_held_de.py

@@ -115,7 +115,10 @@ matthiasKleifegs = "6602072426"
 
 
 # Definition of workgroups for automatic Scopus publication retrieval
-
+# Other parameter define query options. It can be controlled if citations
+# or keywords should be used. The parameter max_authors defines the limit
+# to identify collaborations papers.
+#
 sc_start = 2016
 sc_citations = False
 sc_keywords = True
@@ -148,7 +151,7 @@ sc_workgroups = [
 {'name':"computing",'authors':dts_wp23},
 {'name':"helmholtz-cube",'authors':dts_wp31},
 {'name':"gaseous-detectors",'authors':dts_wp32},
-{'name':"photon-detetors",'authors':dts_wp33},
+{'name':"photon-detectors",'authors':dts_wp33},
 {'name':"fast-timing",'authors':dts_wp34},
 {'name':"cmos-sensors",'authors':dts_wp35},
 ]

+ 13 - 0
log/scopus-publications-held-de.log

@@ -15,3 +15,16 @@
 2018-03-21	18:29:03	  553	    0	 3050	    0	0:00:26.512658
 2018-03-21	18:30:17	  553	    0	 3050	    0	0:00:09.612537
 2018-03-21	18:30:53	  554	    1	 3050	    0	0:00:50.147331
+2018-05-18	09:41:42	  561	    0	     	     	0:00:00.026463
+2018-05-18	09:42:11	  561	   14	     	     	0:02:33.279780
+2018-05-18	09:49:56	  575	    0	     	     	0:00:52.256674
+2018-05-18	10:42:33	   51	    2	     	     	0:00:23.904634
+2018-05-18	11:25:15	   53	  196	     	     	0:22:02.473417
+2018-05-18	14:10:34	  344	  119	     	     	0:07:25.366567
+2018-05-18	14:37:46	  460	    8	     	     	0:01:36.838421
+2018-05-18	15:36:54	  661	  110	     	     	0:07:38.430624
+2018-05-18	17:53:40	  771	  178	     	     	0:19:25.439346
+2018-05-18	20:25:32	  948	    0	     	     	0:03:00.877839
+2018-06-12	23:25:24	  948	   24	     	     	0:09:37.270052
+2018-06-12	23:39:40	  972	    0	     	     	0:01:19.301210
+2018-08-02	13:58:27	  972	   55	     	     	0:07:36.934371

+ 33 - 0
rm-scopusid.py

@@ -0,0 +1,33 @@
+# Remove scopus ID from publication database
+#
+
+import sys
+
+# Mysql persistent data (Accout: scopus, $scopus$)
+import pymysql.cursors
+import pymysql
+
+from config import *
+
+
+if len(sys.argv) > 1:
+
+    scopusid = sys.argv[1]
+
+    # Connect to the database
+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
+
+    try:
+        with connection.cursor() as cursor:
+
+            sql = "DELETE FROM publications WHERE scopusid=\"%s\" " % scopusid
+            print sql;
+            cursor.execute(sql)
+            connection.commit()
+
+    finally:
+        connection.close()
+
+
+
+

+ 8 - 8
test-scopus.py

@@ -191,11 +191,11 @@ def get_scopus_brief(SCOPUS_ID, max_authors=1000):
 # List of newly cited publications
 #
 
+print get_scopus_info("SCOPUS_ID:84969498463")
 
-
-resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&view=metrics",
-            headers={'Accept':'application/json',
-                             'X-ELS-APIKey': MY_API_KEY})
+#resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&view=metrics",
+#            headers={'Accept':'application/json',
+#                             'X-ELS-APIKey': MY_API_KEY})
 
 #print resp
 
@@ -215,7 +215,7 @@ resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&vie
 #publist = get_scopus_list(ufo_ips, 'PUBYEAR = 2015', 30)
 
 # Exclude authors?
-publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
+#publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
 
 
 # Author ausschliessen - black list !!!
@@ -224,10 +224,10 @@ publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
 #
 # Display the result
 #
-print "Number of publications: %d" % len(publist)
+#print "Number of publications: %d" % len(publist)
 
-for pub in publist:
-    print get_scopus_brief(pub,10000)
+#for pub in publist:
+#    print get_scopus_brief(pub,10000)
 
 
 # Test printing functions

+ 34 - 8
test-wp.py

@@ -9,33 +9,59 @@ from wordpress_xmlrpc import WordPressPost, WordPressComment
 from wordpress_xmlrpc.methods.posts import GetPosts, NewPost, EditPost
 from wordpress_xmlrpc.methods.comments import NewComment, EditComment
 from wordpress_xmlrpc.methods.users import GetUserInfo
+from wordpress_xmlrpc.methods.taxonomies import GetTerms
 
 from config import *
 
+def wordpress_get_category(slug):
+     # Load taxonomy and search for the slug
+     catlist = wp.call(GetTerms('category'))
+
+     for cat in catlist:
+         if cat.slug == slug:
+             return cat
+
+
+
 # Use Wordpress account - not the mysql credentials
 # Todo: use scopus later !!!
 wp = Client(wp_api_url,wp_user,wp_password)
-print wp.call(GetPosts())
+#print wp.call(GetPosts())
 
 #print wp.call(GetUserInfo())
 
+# Test access to categories
+categories = []
+cat = wordpress_get_category("sensors")
+
+if cat: 
+    print "Found ", cat.name
+    categories.append(cat)
+
+
 # Todo: Set the date of the post according to the scopus date
 
 post = WordPressPost()
 post.title = 'My post 7' # put title of the publication here
 post.slug = 'DOIxxxxx7' # set the name of the post different to the title
 post.content = 'This is a more complete example post about XML-RPC (but still not comlete enough)'
-post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+#post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+post.terms = categories
 
 post.terms_names = {
-    # 'post_tag': ['test', 'firstpost'], # what's that? I don't use it currently
-    'category': ['Publications', 'Reports'] # defined in WP + python script
+    #'post_tag': ['test', 'firstpost'], # what's that? I don't use it currently
+    'category': ['Publications', 'asics'] # defined in WP + python script
 }
 
 # whoops, I forgot to publish it!
 post.post_status = 'publish' # alternative is draft here !
 post.comment_status = 'open' # allow comments - may be only for scopus
-wp.call(EditPost(post.id, post))# Update the before created post
+post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+#wp.call(EditPost(post.id, post))# Update the before created post
+
+print "Created Wordpress post ", post.id
 
 # Todo:
 # Save the id in the publication table, together with the with the scopus id
@@ -50,10 +76,10 @@ wp.call(EditPost(post.id, post))# Update the before created post
 # or my biotech items???
 #
 
-comment = WordPressComment()
-comment.content = 'Hi, thats cool - we can also add our comments automatically'
+#comment = WordPressComment()
+#comment.content = 'Hi, thats cool - we can also add our comments automatically'
 
-comment.id = wp.call(NewComment(post.id, comment))
+#comment.id = wp.call(NewComment(post.id, comment))