Browse Source

Merge branch 'master' of https://fuzzy.fzk.de/gogs/kopmann/scopus

Andreas Kopmann 5 years ago
parent
commit
ffdd94225f
9 changed files with 305 additions and 41 deletions
  1. 1 1
      ak_scopus.py
  2. 47 9
      ak_wordpress.py
  3. 21 14
      etc/config_held_de.py
  4. 147 0
      etc/config_ufo_kit_edu_ipe.py
  5. 13 0
      log/scopus-publications-held-de.log
  6. 33 0
      rm-scopusid.py
  7. 8 8
      test-scopus.py
  8. 33 7
      test-wp.py
  9. 2 2
      test-wp2.py

+ 1 - 1
ak_scopus.py

@@ -114,7 +114,7 @@ def get_scopus_data(SCOPUS_ID):
     url = ("http://api.elsevier.com/content/abstract/scopus_id/"
            + SCOPUS_ID
            + "?field=article-number,title,publicationName,volume,issueIdentifier,"
-           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
+           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn,authkeywords")
     #print url
     resp = requests.get(url,
             headers={'Accept':'application/json',

+ 47 - 9
ak_wordpress.py

@@ -6,11 +6,13 @@
 
 from datetime import datetime
 import json
+from pprint import pprint
 
 from wordpress_xmlrpc import Client
 from wordpress_xmlrpc import WordPressPost, WordPressComment
 from wordpress_xmlrpc.methods.posts import GetPost, NewPost, EditPost
 from wordpress_xmlrpc.methods.comments import NewComment, EditComment
+from wordpress_xmlrpc.methods.taxonomies import GetTerms
 
 from config import *
 
@@ -18,6 +20,19 @@ from config import *
 wp = Client(wp_api_url, wp_user, wp_password) 
 
 
+#
+# Get category from slug name used in the configuration file
+#
+
+def wordpress_get_category(slug):
+     """ Load taxonomy and search for the slug """
+     catlist = wp.call(GetTerms('category'))
+
+     for cat in catlist:
+         if cat.slug == slug:
+             return cat
+
+
 #
 # query post
 #
@@ -42,14 +57,19 @@ def wordpress_get_post(wpid):
 #
 def wordpress_post_by_scopus(data, category = []):
     """ Create a new post based on the Scopus information """
+
+    #print data['abstracts-retrieval-response']
  
     try:
         coredata = data['abstracts-retrieval-response']['coredata']
         authors = data['abstracts-retrieval-response']['authors']['author']
+
     except KeyError:
+        pprint(data)
+        print ""
         print "Have not found authors in dataset"
         print " -> Is the connection to scopus broken???"
-        exit()
+        return(0)
 
     # decode date
     tsstring = coredata['prism:coverDate'].encode('utf-8')
@@ -114,22 +134,40 @@ def wordpress_post_by_scopus(data, category = []):
 
     #print post.content
 
-    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+    #post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
 
+    catlist = []
+    for slug in category:
+        cat = wordpress_get_category(slug)
+        catlist.append(cat)
+    post.terms = catlist
+
+    try:
+        taglist = []
+        for tag in data['abstracts-retrieval-response']['authkeywords']['author-keyword']:
+            print "Keyword: ", tag
+            taglist.append(tag['$'])
+    except:
+        pass
 
-    if category == '':
-        catlist = ['Publications']
-    else:
-        catlist = ['Publications'] + category
     post.terms_names = {
-            'category': catlist # defined in WP + python script
+            'category': ['Publications'],
+            'post_tag': taglist
         }
 
     # whoops, I forgot to publish it!
-    post.post_status = 'publish' # alternative is draft here !
+    if len(authors) > sc_max_authors:
+        post.post_status = 'draft' # check how to handle publication in wordpress
+        print "Too many authors %d - set to draft" % (len(authors))
+    else:
+        post.post_status = 'publish' # handled as a standard publication
     post.comment_status = 'closed' # allow comments - may be only for scopus
-    wp.call(EditPost(post.id, post))# Update the before created post
 
+    # Todo: this can fail! Add proper error handling 
+    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+    #wp.call(EditPost(post.id, post))# Update the before created post
+
+    
     # need to update the database !!!
     return post.id
 

+ 21 - 14
etc/config_held_de.py

@@ -115,8 +115,15 @@ matthiasKleifegs = "6602072426"
 
 
 # Definition of workgroups for automatic Scopus publication retrieval
-
+# Other parameter define query options. It can be controlled if citations
+# or keywords should be used. The parameter max_authors defines the limit
+# to identify collaborations papers.
+#
 sc_start = 2016
+sc_citations = False
+sc_keywords = True
+sc_max_authors = 25
+
 
 dts_wp11 = [michaelFiederle,dorisEckstein,alexanderDierlamm]
 dts_wp12 = [ulrichTrunk,ivanPeric]
@@ -125,7 +132,7 @@ dts_wp14 = [andreasMussgiller]
 
 dts_wp21 = [marcSchneider,ms2,ms3,ms4]
 dts_wp22 = [peterKaever,matthiasBalzer,oliverSander]
-dts_wp23 = [michaelBussmann,andreasKopmann,surenChilingaryan,matthiasVogelgesang]
+dts_wp23 = [michaelBussmann,andreasKopmann,ak2,surenChilingaryan,matthiasVogelgesang]
 
 dts_wp31 = [davidPennicard]
 dts_wp32 = [berndVoss,oliverSchaefer]
@@ -135,18 +142,18 @@ dts_wp35 = [corneliaWunderer]
 
 
 sc_workgroups = [
-{'name':"Semiconductor sensors",'authors':dts_wp11},
-{'name':"ASICs",'authors':dts_wp12},
-{'name':"Electronics packaging",'authors':dts_wp13},
-{'name':"Innovative materials",'authors':dts_wp14},
-{'name':"Optical data transmission",'authors':dts_wp21},
-{'name':"Programmable electronics",'authors':dts_wp22},
-{'name':"Real-time data processing",'authors':dts_wp23},
-{'name':"Helmholtz cube",'authors':dts_wp31},
-{'name':"Compact gaseous detectors",'authors':dts_wp32},
-{'name':"Photon & X-ray detetors",'authors':dts_wp33},
-{'name':"Fast timing detectors",'authors':dts_wp34},
-{'name':"CMOS sensors",'authors':dts_wp35},
+{'name':"sensors",'authors':dts_wp11},
+{'name':"asics",'authors':dts_wp12},
+{'name':"packaging",'authors':dts_wp13},
+{'name':"materials",'authors':dts_wp14},
+{'name':"photonics",'authors':dts_wp21},
+{'name':"electronics",'authors':dts_wp22},
+{'name':"computing",'authors':dts_wp23},
+{'name':"helmholtz-cube",'authors':dts_wp31},
+{'name':"gaseous-detectors",'authors':dts_wp32},
+{'name':"photon-detectors",'authors':dts_wp33},
+{'name':"fast-timing",'authors':dts_wp34},
+{'name':"cmos-sensors",'authors':dts_wp35},
 ]
 """ Definition of the workgroups
     

+ 147 - 0
etc/config_ufo_kit_edu_ipe.py

@@ -0,0 +1,147 @@
+""" Scopus script's configration
+    
+    *A Kopmann, 12.4.17*
+
+Configuration for the IPE setup at ufo.kit.edu
+
+"""
+
+# Local publication database
+
+db_host = 'localhost'
+db_user = 'scopus'
+db_pw = '$scopus$'
+db_name = 'scopus_ipe'
+
+
+# Access to Wordpress installation
+
+wp_api_url = "https://ufo.kit.edu/ipe/xmlrpc.php"
+""" Access to the Wordpress installation """
+wp_user = "scopus"
+wp_password = "$scopus$"
+
+
+# Reporting
+
+log_file = "/root/scopus-ipe/log/scopus-publications-ufo-kit-edu-ipe.log"
+""" Logfile name for reporting """
+
+
+# Scopus query definition
+
+MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
+""" Scopus access key (Andreas Kopmann) """
+
+
+# Scopus author IDs
+
+# KIT, PDV
+andreasKopmann = "35313939900"
+ak2 = "57193311016"
+surenChilingaryan = "15076530600"
+matthiasVogelgesang = "35303862100"
+timoDritschler = "56473578500"
+#andreiShkarin = "56950893700"
+nicholasTanJerome = ""
+#tillBergmann = "35308595100"
+armenBeglarian = "55343303900"
+petraRohr = "40561503300"
+norbertKunka = "35276889200"
+horstDemattio = "6506285395"
+heinzFrankrone = ""
+danielKompalla = ""
+
+
+# KIT, EPS
+micheleCaselle = "57194376511"
+mc2 = "57194376512"
+#urosStevanovic = "55557712600"
+lorenzoRota = "56473442500"
+matthiasBalzer = "35519411500"
+matthiasKleifges = "6602072426"
+luisArdila = "" 
+nickKarcher = "" 
+alexanderMenshikov = "7003298761"
+denisTcherniakhovski = "6508308928"
+thomasSchuh = ""
+oliverSander = "22986354000"
+
+
+# KIT, MSA
+thomasKuehner = "24776279000"
+saschaWuestling = "23480623800"
+heikoBouquet = ""
+birgitBurger = ""
+andreasEbersold = "" 
+larsEisenblaetter = "57094104200"
+juliusHartmann = ""
+djornKarnick = "37081197400"
+marcSchneider = "55649571035"
+ms2 = "55649571037"
+ms3 = "55649571036"
+ms4 = "55649571041"
+oliverKroemer = "8520193800"
+klausPetry = "7004446817"
+
+
+# KIT, ADL
+ivanPeric = "9043482900"
+felixEhrler = "56674370500"
+robertoBlanco = "56927736400"
+alenaWeber = ""
+
+
+# KIT, AVT
+thomasBlank = "56819218800"
+simonBischof = "" 
+anBao = "57192082222"
+helgeWurst = "" 
+
+# KIT, SWM
+nicoleRuiter = "6507953977"
+torstenHopp = "24469880700"
+michaelZapf = "19640815400"
+
+
+# KIT, IPE
+marcWeber = "56654729000"
+mw2 = "56603987800"
+mw3 = "7404138824"
+
+
+
+# Definition of workgroups for automatic Scopus publication retrieval
+
+sc_start = 2016
+sc_citations = False
+sc_keywords = True
+sc_max_authors = 25
+
+ipe_pdv = [andreasKopmann,ak2,surenChilingaryan,matthiasVogelgesang,timoDritschler,armenBeglarian,horstDemattio,petraRohr,norbertKunka]
+ipe_eps = [matthiasBalzer,lorenzoRota,micheleCaselle,mc2,matthiasKleifges,alexanderMenshikov,denisTcherniakhovski,oliverSander]
+ipe_msa = [thomasKuehner,saschaWuestling,larsEisenblaetter,djornKarnick,marcSchneider,ms2,ms3,ms4,oliverKroemer,klausPetry]
+ipe_avt = [thomasBlank]
+ipe_adl = [ivanPeric,felixEhrler,robertoBlanco]
+ipe_swm = [nicoleRuiter,torstenHopp,michaelZapf] 
+
+
+sc_workgroups = [
+{'name':"pdv",'authors':ipe_pdv},
+{'name':"eps",'authors':ipe_eps},
+{'name':"msa",'authors':ipe_msa},
+{'name':"avt",'authors':ipe_avt},
+{'name':"adl",'authors':ipe_adl},
+{'name':"swm",'authors':ipe_swm}
+]
+""" Definition of the workgroups
+    
+    Each workgroup is defined by a list of Scopus ID's and the
+    name of the category to be used in Wordpress. The category
+    for a new workgroup has to be created in Wordpress before
+    adding publications
+    """
+
+
+
+

+ 13 - 0
log/scopus-publications-held-de.log

@@ -15,3 +15,16 @@
 2018-03-21	18:29:03	  553	    0	 3050	    0	0:00:26.512658
 2018-03-21	18:30:17	  553	    0	 3050	    0	0:00:09.612537
 2018-03-21	18:30:53	  554	    1	 3050	    0	0:00:50.147331
+2018-05-18	09:41:42	  561	    0	     	     	0:00:00.026463
+2018-05-18	09:42:11	  561	   14	     	     	0:02:33.279780
+2018-05-18	09:49:56	  575	    0	     	     	0:00:52.256674
+2018-05-18	10:42:33	   51	    2	     	     	0:00:23.904634
+2018-05-18	11:25:15	   53	  196	     	     	0:22:02.473417
+2018-05-18	14:10:34	  344	  119	     	     	0:07:25.366567
+2018-05-18	14:37:46	  460	    8	     	     	0:01:36.838421
+2018-05-18	15:36:54	  661	  110	     	     	0:07:38.430624
+2018-05-18	17:53:40	  771	  178	     	     	0:19:25.439346
+2018-05-18	20:25:32	  948	    0	     	     	0:03:00.877839
+2018-06-12	23:25:24	  948	   24	     	     	0:09:37.270052
+2018-06-12	23:39:40	  972	    0	     	     	0:01:19.301210
+2018-08-02	13:58:27	  972	   55	     	     	0:07:36.934371

+ 33 - 0
rm-scopusid.py

@@ -0,0 +1,33 @@
+# Remove scopus ID from publication database
+#
+
+import sys
+
+# Mysql persistent data (Accout: scopus, $scopus$)
+import pymysql.cursors
+import pymysql
+
+from config import *
+
+
+if len(sys.argv) > 1:
+
+    scopusid = sys.argv[1]
+
+    # Connect to the database
+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
+
+    try:
+        with connection.cursor() as cursor:
+
+            sql = "DELETE FROM publications WHERE scopusid=\"%s\" " % scopusid
+            print sql;
+            cursor.execute(sql)
+            connection.commit()
+
+    finally:
+        connection.close()
+
+
+
+

+ 8 - 8
test-scopus.py

@@ -191,11 +191,11 @@ def get_scopus_brief(SCOPUS_ID, max_authors=1000):
 # List of newly cited publications
 #
 
+print get_scopus_info("SCOPUS_ID:84969498463")
 
-
-resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&view=metrics",
-            headers={'Accept':'application/json',
-                             'X-ELS-APIKey': MY_API_KEY})
+#resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&view=metrics",
+#            headers={'Accept':'application/json',
+#                             'X-ELS-APIKey': MY_API_KEY})
 
 #print resp
 
@@ -215,7 +215,7 @@ resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&vie
 #publist = get_scopus_list(ufo_ips, 'PUBYEAR = 2015', 30)
 
 # Exclude authors?
-publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
+#publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
 
 
 # Author ausschliessen - black list !!!
@@ -224,10 +224,10 @@ publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
 #
 # Display the result
 #
-print "Number of publications: %d" % len(publist)
+#print "Number of publications: %d" % len(publist)
 
-for pub in publist:
-    print get_scopus_brief(pub,10000)
+#for pub in publist:
+#    print get_scopus_brief(pub,10000)
 
 
 # Test printing functions

+ 33 - 7
test-wp.py

@@ -9,9 +9,20 @@ from wordpress_xmlrpc import WordPressPost, WordPressComment
 from wordpress_xmlrpc.methods.posts import GetPosts, NewPost, EditPost
 from wordpress_xmlrpc.methods.comments import NewComment, EditComment
 from wordpress_xmlrpc.methods.users import GetUserInfo
+from wordpress_xmlrpc.methods.taxonomies import GetTerms
 
 from config import *
 
+def wordpress_get_category(slug):
+     # Load taxonomy and search for the slug
+     catlist = wp.call(GetTerms('category'))
+
+     for cat in catlist:
+         if cat.slug == slug:
+             return cat
+
+
+
 # Use Wordpress account - not the mysql credentials
 # Todo: use scopus later !!!
 wp = Client(wp_api_url,wp_user,wp_password)
@@ -19,23 +30,38 @@ wp = Client(wp_api_url,wp_user,wp_password)
 
 #print wp.call(GetUserInfo())
 
+# Test access to categories
+categories = []
+cat = wordpress_get_category("sensors")
+
+if cat: 
+    print "Found ", cat.name
+    categories.append(cat)
+
+
 # Todo: Set the date of the post according to the scopus date
 
 post = WordPressPost()
 post.title = 'My post 7' # put title of the publication here
 post.slug = 'DOIxxxxx7' # set the name of the post different to the title
 post.content = 'This is a more complete example post about XML-RPC (but still not comlete enough)'
-post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+#post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+post.terms = categories
 
 post.terms_names = {
-    # 'post_tag': ['test', 'firstpost'], # what's that? I don't use it currently
-    'category': ['Publications', 'Reports'] # defined in WP + python script
+    #'post_tag': ['test', 'firstpost'], # what's that? I don't use it currently
+    'category': ['Publications', 'asics'] # defined in WP + python script
 }
 
 # whoops, I forgot to publish it!
 post.post_status = 'publish' # alternative is draft here !
 post.comment_status = 'open' # allow comments - may be only for scopus
-wp.call(EditPost(post.id, post))# Update the before created post
+post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+#wp.call(EditPost(post.id, post))# Update the before created post
+
+print "Created Wordpress post ", post.id
 
 # Todo:
 # Save the id in the publication table, together with the with the scopus id
@@ -50,10 +76,10 @@ wp.call(EditPost(post.id, post))# Update the before created post
 # or my biotech items???
 #
 
-comment = WordPressComment()
-comment.content = 'Hi, thats cool - we can also add our comments automatically'
+#comment = WordPressComment()
+#comment.content = 'Hi, thats cool - we can also add our comments automatically'
 
-comment.id = wp.call(NewComment(post.id, comment))
+#comment.id = wp.call(NewComment(post.id, comment))
 
 
 

+ 2 - 2
test-wp2.py

@@ -30,10 +30,10 @@ if len(sys.argv) > 1:
 # Read post
 try:
     post = wp.call(GetPost(wpid))
-    print "Post %d: %s" %(wpid,post.title)
+    print ("Post %d: %s" %(wpid,post.title))
 
 except:
-    print "Post %d seems to be not available" % wpid
+    print ("Post %d seems to be not available" % wpid)