Browse Source

Merge branch 'master' of https://fuzzy.fzk.de/gogs/kopmann/scopus

Andreas Kopmann 3 years ago
parent
commit
85a1f4e17f

+ 1 - 0
.gitignore

@@ -1,5 +1,6 @@
 *config.py
 *.pyc
+*.swp
 info/
 scopus/
 scopus-master/

+ 157 - 0
README.md

@@ -156,6 +156,163 @@ This would have the nice effect, that all student work is organized systematical
 
 - keywords
 
+## Configuration
+
+For each site database, access to wordpress and the author profiles need to 
+be configured in `config.py`. 
+
+For the UFO webpage the configuration looks like diplayed below. In the first part the access to the database is configured. The database is used as 
+a cache to keep track which publications are already available in 
+Wordpress.
+
+In the second block the access to the Wordpress server is given. The 
+specified user (e.g. called `scopus`) need to have editor permissions in order to 
+submit new posts and to suggest keywords.  
+
+For the access to scopus a key is required.
+
+The main part of the configuration is dedicated to author identification 
+and grouping of authors. At first variables for each author are defined.
+If a author is registered with more than one Scopus ID also these IDs
+should be added.
+
+Finally in the variable `sc_workgroups` a named list of all groups
+of authors that should be considered is defined. The name of the groups
+need to be defined in Wordpress as categories. The slug name of the 
+category in wordpress is used in the configuration file as identification
+of the author group.
+
+
+Config.py:
+
+```
+""" Scopus script's configration
+    
+    *A Kopmann, 12.4.17*
+
+Configuration for the active setup at ufo.kit.edu
+
+"""
+
+# Local publication database
+
+db_host = 'localhost'
+db_user = 'scopus'
+db_pw = '$scopus$'
+db_name = 'scopus'
+
+
+# Access to Wordpress installation
+
+wp_api_url = "https://ufo.kit.edu/dis/xmlrpc.php"
+""" Access to the Wordpress installation """
+wp_user = "scopus"
+wp_password = "$scopus$"
+
+
+# Reporting
+
+log_file = "/root/scopus/log/scopus-publications-ufo-kit-edu.log"
+""" Logfile name for reporting """
+
+
+# Scopus query definition
+
+MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
+""" Scopus access key (Andreas Kopmann) """
+DTS_API_KEY = "f2b35fe46478f22f3c14cf53f73d4f93"
+
+# Scopus author IDs
+
+# KIT, PDV
+ak = "35313939900"
+ak2 = "57193311016"
+csa = "15076530600"
+matthiasVogelgesang = "35303862100"
+timoDritschler = "56473578500"
+andreiShkarin = "56950893700"
+nicholasTanJerome = "57200247965"
+tillBergmann = "35308595100"
+armenBeglarian = "55343303900"
+petraRohr = "40561503300"
+norbertKunka = "35276889200"
+horstDemattio = "6506285395"
+
+# KIT, EPS
+micheleCaselle = "57194376511"
+mc2 = "57194376512"
+urosStevanovic = "55557712600"
+lorenzoRota = "56473442500"
+matthiasBalzer = "35519411500"
+
+# KIT, IPE
+marcWeber = "56654729000"
+mw2 = "56603987800"
+mw3 = "7404138824"
+
+# KIT, IPE
+marcWeber = "56654729000"
+mw2 = "56603987800"
+mw3 = "7404138824"
+
+# KIT, IPS
+tomyRolo = "56118820400"
+tr2 = "35194644400"
+tr3 = "35277157300"
+tomasFarago = "56655045700"
+alexyErshof = "56441809800"
+romanShkarin = "56951331000"
+tiloBaumbach = "7003270957"
+thomasVandekamp = "46761453500"
+danielHaenschke = "55532222200"
+
+# TUD
+michaelHeethoff = "55979397800"
+sebastianSchmelzle = "34768986100"
+
+# UHD, has been combined with another person in Munich !!!
+philipLoesel = "57203423658"
+
+# Others (e.g. for black list)
+ashotChiligarian = "7004126133"
+hansBluemer = "7006284555"
+matthiasKleifegs = "6602072426"
+
+# Definition of workgroups for automatic Scopus publication retrieval
+
+sc_start = 2010
+sc_citations = False
+sc_keywords = True
+sc_max_authors = 25
+
+
+ufo_pdv = [ak, ak2, csa, matthiasVogelgesang, timoDritschler ]
+ufo_eps = [matthiasBalzer, lorenzoRota, micheleCaselle, mc2 ]
+ufo_ips = [tomyRolo, tr2, tr3, tomasFarago, danielHaenschke]
+ufo_apps = [thomasVandekamp]
+ufo_alg = [philipLoesel]
+
+sc_workgroups = [
+{'name':'computing','authors':ufo_pdv},
+{'name':'electronics','authors':ufo_eps},
+{'name':'x-ray-imaging','authors':ufo_ips},
+{'name':'morphology','authors':ufo_apps},
+{'name':'algorithms','authors':ufo_alg}
+]
+""" Definition of the workgroups
+    
+    Each workgroup is defined by a list of Scopus ID's and the
+    name of the category to be used in Wordpress. The category
+    for a new workgroup has to be created in Wordpress before
+    adding publications
+    """
+
+
+```
+
+
+
+
 
 ## Structure of the database
 

+ 1 - 1
ak_scopus.py

@@ -114,7 +114,7 @@ def get_scopus_data(SCOPUS_ID):
     url = ("http://api.elsevier.com/content/abstract/scopus_id/"
            + SCOPUS_ID
            + "?field=article-number,title,publicationName,volume,issueIdentifier,"
-           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
+           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn,authkeywords")
     #print url
     resp = requests.get(url,
             headers={'Accept':'application/json',

+ 80 - 32
ak_wordpress.py

@@ -6,11 +6,13 @@
 
 from datetime import datetime
 import json
+from pprint import pprint
 
 from wordpress_xmlrpc import Client
 from wordpress_xmlrpc import WordPressPost, WordPressComment
 from wordpress_xmlrpc.methods.posts import GetPost, NewPost, EditPost
 from wordpress_xmlrpc.methods.comments import NewComment, EditComment
+from wordpress_xmlrpc.methods.taxonomies import GetTerms
 
 from config import *
 
@@ -18,6 +20,19 @@ from config import *
 wp = Client(wp_api_url, wp_user, wp_password) 
 
 
+#
+# Get category from slug name used in the configuration file
+#
+
+def wordpress_get_category(slug):
+     """ Load taxonomy and search for the slug """
+     catlist = wp.call(GetTerms('category'))
+
+     for cat in catlist:
+         if cat.slug == slug:
+             return cat
+
+
 #
 # query post
 #
@@ -42,14 +57,19 @@ def wordpress_get_post(wpid):
 #
 def wordpress_post_by_scopus(data, category = []):
     """ Create a new post based on the Scopus information """
+
+    #print data['abstracts-retrieval-response']
  
-    coredata = data['abstracts-retrieval-response']['coredata']
     try:
+        coredata = data['abstracts-retrieval-response']['coredata']
         authors = data['abstracts-retrieval-response']['authors']['author']
+
     except KeyError:
+        pprint(data)
+        print ""
         print "Have not found authors in dataset"
         print " -> Is the connection to scopus broken???"
-        exit()
+        return(0)
 
     # decode date
     tsstring = coredata['prism:coverDate'].encode('utf-8')
@@ -62,74 +82,102 @@ def wordpress_post_by_scopus(data, category = []):
 
     # define post structure
     post = WordPressPost()
-    post.title = coredata['dc:title'].encode('utf-8')
+    post.title = coredata['dc:title']
     post.date = ts
 
     # set the name of the post different to the title
-    post.slug = coredata['dc:identifier'].encode('utf-8')
+    post.slug = coredata['dc:identifier']
     
-    post.excerpt = authors[0]['ce:indexed-name'].encode('utf-8')
+    post.excerpt = authors[0]['ce:indexed-name']
     if len(authors) > 2:
-        post.excerpt += " et al."
+        post.excerpt += u' et al.'
     elif len(authors) == 2:
-        post.excerpt += u', ' + authors[1]['ce:indexed-name'].encode('utf-8')
-    post.excerpt += u', in <em>' + coredata['prism:publicationName'].encode('utf-8') + u'</em>'
+        post.excerpt += u', ' + authors[1]['ce:indexed-name']
+    post.excerpt += u', in <em>' + coredata['prism:publicationName'] + u'</em>'
     if 'prism:volume' in coredata:
-        post.excerpt += u', ' + coredata['prism:volume'].encode('utf-8')
+        post.excerpt += u', ' + coredata['prism:volume']
     post.excerpt += u' (' + str(year).encode('utf-8') + u')'
     if 'prism:pageRange' in coredata:
-        post.excerpt += u' ' + coredata['prism:pageRange'].encode('utf-8')
+        post.excerpt += u' ' + coredata['prism:pageRange']
     if 'article-number' in coredata:
-        post.excerpt += u', ' + coredata['article-number'].encode('utf-8')
+        post.excerpt += u', ' + coredata['article-number']
     post.excerpt += u'.'
 
-    post.content = u'<p>' + authors[0]['ce:indexed-name'].encode('utf-8')
+    post.content = u'<p>' + authors[0]['ce:indexed-name']
     authors.pop(0)
     if len(authors) > 20:
-        post.content += " et al."
+        post.content += u' et al.'
     else:
         for author in authors:
-            post.content += u', ' + author['ce:indexed-name'].encode('utf-8')
+            post.content += u', ' + author['ce:indexed-name']
     post.content += u'</p>'
-    post.content += u'<p>in <em>' + coredata['prism:publicationName'].encode('utf-8') + u'</em>'
+    post.content += u'<p>in <em>' + coredata['prism:publicationName'] + u'</em>'
     if 'prism:volume' in coredata:
-        post.content += u', ' + coredata['prism:volume'].encode('utf-8')
+        post.content += u', ' + coredata['prism:volume']
     post.content += u' (' + str(year).encode('utf-8') + u')'
     if 'prism:pageRange' in coredata:
-        post.content += u' ' + coredata['prism:pageRange'].encode('utf-8')
+        post.content += u' ' + coredata['prism:pageRange']
     if 'article-number' in coredata:
-        post.content += u', ' + coredata['article-number'].encode('utf-8')
+        post.content += u', ' + coredata['article-number']
     post.content += u'.'
     if 'prism:doi' in coredata:
-        post.content += u' DOI:' + coredata['prism:doi'].encode('utf-8')
+        post.content += u' DOI:' + coredata['prism:doi']
     post.content += u'</p>\n\n'
     if 'dc:description' in coredata:
         post.content += u'<div class="accordion-inner"><h4>Abstract</h4>' + coredata['dc:description']
         if 'authkeywords' in coredata:
-            post.content += u'\n<b>Keywords:</b> ' + coredata['authkeywords'].encode('utf-8')
+            post.content += u'\n<b>Keywords:</b> ' + coredata['authkeywords']
         post.content += u'</div>'
     if 'prism:doi' in coredata:
-        link = u'http://dx.doi.org/' + coredata['prism:doi'].encode('utf-8')
+        link = u'http://dx.doi.org/' + coredata['prism:doi']
         post.content += u'\n\n<div class="accordion-inner"><a class="btn btn-primary" href="' + link + u'"><i class="icon-download icon-white"></i> Get it</a></div>'
 
     #print post.content
+    #post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+    catlist = []
+    for slug in category:
+        try:
+           cat = wordpress_get_category(slug)
+           catlist.append(cat)
+        except: 
+           print "Slug %s not found in Wordpress" % slug
+           exit
+    post.terms = catlist
+    print post.terms
+
+    taglist = []
+    try:
+        for tag in data['abstracts-retrieval-response']['authkeywords']['author-keyword']:
+            try:
+                print "Keyword: ", tag
+                taglist.append(tag['$'].decode('utf-8','ignore'))
+            except:
+                print "Keyword contains special characters - droped!"
+                pass
+    except:
+        # No keywords given
+	pass
 
-    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
-
-
-    if category == '':
-        catlist = ['Publications']
-    else:
-        catlist = ['Publications'] + category
     post.terms_names = {
-            'category': catlist # defined in WP + python script
-        }
+        'category': ['Publications'],
+        'post_tag': taglist
+    }
+    print post.terms_names
 
     # whoops, I forgot to publish it!
-    post.post_status = 'publish' # alternative is draft here !
+    if len(authors) > sc_max_authors:
+        post.post_status = 'draft' # check how to handle publication in wordpress
+        print "Too many authors %d - set to draft" % (len(authors))
+    else:
+        post.post_status = 'publish' # handled as a standard publication
     post.comment_status = 'closed' # allow comments - may be only for scopus
-    wp.call(EditPost(post.id, post))# Update the before created post
 
+    # Todo: this can fail! Add proper error handling 
+    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+    #wp.call(EditPost(post.id, post))# Update the before created post
+    print post.id
+    
     # need to update the database !!!
     return post.id
 

+ 3 - 2
config.py.sample

@@ -45,7 +45,8 @@ norbertKunka = "35276889200"
 horstDemattio = "6506285395"
 
 # KIT, EPS
-micheleCaselle = "7006767859"
+micheleCaselle = "57194376511"
+mc2 = "57194376512"
 urosStevanovic = "55557712600"
 lorenzoRota = "56473442500"
 matthiasBalzer = "35519411500"
@@ -83,7 +84,7 @@ matthiasKleifegs = "6602072426"
 sc_start = 2010
 
 ufo_pdv = [ak, csa, matthiasVogelgesang, timoDritschler, andreiShkarin ]
-ufo_eps = [matthiasBalzer, lorenzoRota, micheleCaselle ]
+ufo_eps = [matthiasBalzer, lorenzoRota, micheleCaselle, mc2 ]
 ufo_ips = [tomyRolo, tr2, tr3, tomasFarago]
 ufo_apps = [thomasVandekamp]
 ufo_alg = [philipLoesel]

+ 168 - 0
etc/config_held_de.py

@@ -0,0 +1,168 @@
+""" Scopus script's configration
+    
+    *A Kopmann, 20.3.2018*
+
+Configuration for the test setup ufo.kit.edu/held (Helmholtz Detectors)
+
+"""
+
+# Local publication database
+
+db_host = 'localhost'
+db_user = 'scopus'
+db_pw = '$scopus$'
+db_name = 'scopus_held'
+
+
+# Access to Wordpress installation
+
+wp_api_url = "https://ufo.kit.edu/held/xmlrpc.php"
+""" Access to the Wordpress installation """
+wp_user = "scopus1"
+wp_password = "$scopus$"
+
+
+# Reporting
+
+log_file = "/root/scopus-held/log/scopus-publications-held-de.log"
+""" Logfile name for reporting """
+
+
+# Scopus query definition
+
+MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
+""" Scopus access key (Andreas Kopmann) """
+
+
+# Scopus author IDs
+
+# KIT, PDV
+andreasKopmann = "35313939900"
+ak2 = "57193311016"
+surenChilingaryan = "15076530600"
+matthiasVogelgesang = "35303862100"
+timoDritschler = "56473578500"
+andreiShkarin = "56950893700"
+nicholasTanJerome = "57200247965"
+tillBergmann = "35308595100"
+armenBeglarian = "55343303900"
+petraRohr = "40561503300"
+norbertKunka = "35276889200"
+horstDemattio = "6506285395"
+
+# KIT, EPS
+micheleCaselle = "57194376511"
+mc2 = "57194376512"
+urosStevanovic = "55557712600"
+lorenzoRota = "56473442500"
+matthiasBalzer = "35519411500"
+oliverSander = "22986354000" 
+
+# KIT, IPE
+marcWeber = "56654729000"
+mw2 = "56603987800"
+mw3 = "7404138824"
+
+# KIT, IPS
+tomyRolo = "56118820400"
+tr2 = "35194644400"
+tr3 = "35277157300"
+tomasFarago = "56655045700"
+alexyErshof = "56441809800"
+romanShkarin = "56951331000"
+tiloBaumbach = "7003270957"
+thomasVandekamp = "46761453500"
+
+# TUD
+michaelHeethoff = "55979397800"
+sebastianSchmelzle = "34768986100"
+
+# UHD
+philipLoesel = "57190622016"
+
+# DTS
+heinzGraafsma = "7003866547"
+michaelFiederle = "56889294000"
+dorisEckstein = "7006469870"
+alexanderDierlamm = "6603122027"
+ulrichTrunk = "6602643714"
+ivanPeric = "9043482900"
+thomasBlank = "56819218800"
+karstenHansen = "7401918494"
+andreasMussgiller = "35278917500" 
+jimRitman = "6701495633"
+marcSchneider = "55649571035"
+ms2 = "55649571037"
+ms3 = "55649571036"
+ms4 = "55649571041"
+peterKaever = "55955104200"
+tobiasStockmanns = "8722644300"
+michaelBussmann = "6701512276"
+christianSchmidt = "55547128858"
+davidPennicard = "12751824800"
+berndVoss = "22965153700"
+oliverSchaefer = "56754175600"
+felixSefkow = "8062394000"
+andreasHaugns = "10538884100"
+mladenKis = "7003935197"
+corneliaWunderer = "6603143355"
+
+
+# Others (e.g. for black list)
+ashotChiligarian = "7004126133"
+hansBluemer = "7006284555"
+matthiasKleifegs = "6602072426"
+
+
+# Definition of workgroups for automatic Scopus publication retrieval
+# Other parameter define query options. It can be controlled if citations
+# or keywords should be used. The parameter max_authors defines the limit
+# to identify collaborations papers.
+#
+sc_start = 2016
+sc_citations = False
+sc_keywords = True
+sc_max_authors = 25
+
+
+dts_wp11 = [michaelFiederle,dorisEckstein,alexanderDierlamm]
+dts_wp12 = [ulrichTrunk,ivanPeric]
+dts_wp13 = [thomasBlank,karstenHansen]
+dts_wp14 = [andreasMussgiller]
+
+dts_wp21 = [marcSchneider,ms2,ms3,ms4]
+dts_wp22 = [peterKaever,matthiasBalzer,oliverSander]
+dts_wp23 = [michaelBussmann,andreasKopmann,ak2,surenChilingaryan,matthiasVogelgesang]
+
+dts_wp31 = [davidPennicard]
+dts_wp32 = [berndVoss,oliverSchaefer]
+dts_wp33 = [felixSefkow] 
+dts_wp34 = [mladenKis,micheleCaselle,lorenzoRota]
+dts_wp35 = [corneliaWunderer]
+
+
+sc_workgroups = [
+{'name':"sensors",'authors':dts_wp11},
+{'name':"asics",'authors':dts_wp12},
+{'name':"packaging",'authors':dts_wp13},
+{'name':"materials",'authors':dts_wp14},
+{'name':"photonics",'authors':dts_wp21},
+{'name':"electronics",'authors':dts_wp22},
+{'name':"computing",'authors':dts_wp23},
+{'name':"helmholtz-cube",'authors':dts_wp31},
+{'name':"gaseous-detectors",'authors':dts_wp32},
+{'name':"photon-detectors",'authors':dts_wp33},
+{'name':"fast-timing",'authors':dts_wp34},
+{'name':"cmos-sensors",'authors':dts_wp35},
+]
+""" Definition of the workgroups
+    
+    Each workgroup is defined by a list of Scopus ID's and the
+    name of the category to be used in Wordpress. The category
+    for a new workgroup has to be created in Wordpress before
+    adding publications
+    """
+
+
+
+

+ 21 - 13
etc/config_ufo_kit_edu.py

@@ -32,7 +32,7 @@ log_file = "/root/scopus/log/scopus-publications-ufo-kit-edu.log"
 
 MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
 """ Scopus access key (Andreas Kopmann) """
-
+DTS_API_KEY = "f2b35fe46478f22f3c14cf53f73d4f93"
 
 # Scopus author IDs
 
@@ -43,7 +43,7 @@ csa = "15076530600"
 matthiasVogelgesang = "35303862100"
 timoDritschler = "56473578500"
 andreiShkarin = "56950893700"
-nicholasTanJerome = ""
+nicholasTanJerome = "57200247965"
 tillBergmann = "35308595100"
 armenBeglarian = "55343303900"
 petraRohr = "40561503300"
@@ -51,10 +51,13 @@ norbertKunka = "35276889200"
 horstDemattio = "6506285395"
 
 # KIT, EPS
-micheleCaselle = "7006767859"
+micheleCaselle = "57194376511"
+mc2 = "57194376512"
 urosStevanovic = "55557712600"
 lorenzoRota = "56473442500"
 matthiasBalzer = "35519411500"
+luisArdila = "57183639000"
+
 
 # KIT, IPE
 marcWeber = "56654729000"
@@ -70,13 +73,14 @@ alexyErshof = "56441809800"
 romanShkarin = "56951331000"
 tiloBaumbach = "7003270957"
 thomasVandekamp = "46761453500"
+danielHaenschke = "55532222200"
 
 # TUD
 michaelHeethoff = "55979397800"
 sebastianSchmelzle = "34768986100"
 
-# UHD
-philipLoesel = "57190622016"
+# UHD, has been combined with another person in Munich !!!
+philipLoesel = "57203423658"
 
 # Others (e.g. for black list)
 ashotChiligarian = "7004126133"
@@ -87,19 +91,23 @@ matthiasKleifegs = "6602072426"
 # Definition of workgroups for automatic Scopus publication retrieval
 
 sc_start = 2010
+sc_citations = False
+sc_keywords = True
+sc_max_authors = 25
+
 
-ufo_pdv = [ak, ak2, csa, matthiasVogelgesang, timoDritschler ]
-ufo_eps = [matthiasBalzer, lorenzoRota, micheleCaselle ]
-ufo_ips = [tomyRolo, tr2, tr3, tomasFarago]
+ufo_pdv = [ak, ak2, csa, timoDritschler ]
+ufo_eps = [matthiasBalzer, luisArdila ]
+ufo_ips = [tomasFarago, danielHaenschke]
 ufo_apps = [thomasVandekamp]
 ufo_alg = [philipLoesel]
 
 sc_workgroups = [
-{'name':"Computing",'authors':ufo_pdv},
-{'name':"Electronics",'authors':ufo_eps},
-{'name':"X-ray Imaging",'authors':ufo_ips},
-{'name':"Morphology",'authors':ufo_apps},
-{'name':"Algorithms",'authors':ufo_alg}
+{'name':'computing','authors':ufo_pdv},
+{'name':'electronics','authors':ufo_eps},
+{'name':'x-ray-imaging','authors':ufo_ips},
+{'name':'morphology','authors':ufo_apps},
+{'name':'algorithms','authors':ufo_alg}
 ]
 """ Definition of the workgroups
     

+ 147 - 0
etc/config_ufo_kit_edu_ipe.py

@@ -0,0 +1,147 @@
+""" Scopus script's configration
+    
+    *A Kopmann, 12.4.17*
+
+Configuration for the IPE setup at ufo.kit.edu
+
+"""
+
+# Local publication database
+
+db_host = 'localhost'
+db_user = 'scopus'
+db_pw = '$scopus$'
+db_name = 'scopus_ipe'
+
+
+# Access to Wordpress installation
+
+wp_api_url = "https://ufo.kit.edu/ipe/xmlrpc.php"
+""" Access to the Wordpress installation """
+wp_user = "scopus"
+wp_password = "$scopus$"
+
+
+# Reporting
+
+log_file = "/root/scopus-ipe/log/scopus-publications-ufo-kit-edu-ipe.log"
+""" Logfile name for reporting """
+
+
+# Scopus query definition
+
+MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
+""" Scopus access key (Andreas Kopmann) """
+
+
+# Scopus author IDs
+
+# KIT, PDV
+andreasKopmann = "35313939900"
+ak2 = "57193311016"
+surenChilingaryan = "15076530600"
+matthiasVogelgesang = "35303862100"
+timoDritschler = "56473578500"
+#andreiShkarin = "56950893700"
+nicholasTanJerome = "57200247965"
+#tillBergmann = "35308595100"
+armenBeglarian = "55343303900"
+petraRohr = "40561503300"
+norbertKunka = "35276889200"
+horstDemattio = "6506285395"
+heinzFrankrone = ""
+danielKompalla = ""
+
+
+# KIT, EPS
+micheleCaselle = "57194376511"
+mc2 = "57194376512"
+#urosStevanovic = "55557712600"
+lorenzoRota = "56473442500"
+matthiasBalzer = "35519411500"
+matthiasKleifges = "6602072426"
+luisArdila = "" 
+nickKarcher = "" 
+alexanderMenshikov = "7003298761"
+denisTcherniakhovski = "6508308928"
+thomasSchuh = ""
+oliverSander = "22986354000"
+
+
+# KIT, MSA
+thomasKuehner = "24776279000"
+saschaWuestling = "23480623800"
+heikoBouquet = ""
+birgitBurger = ""
+andreasEbersold = "" 
+larsEisenblaetter = "57094104200"
+juliusHartmann = ""
+djornKarnick = "37081197400"
+marcSchneider = "55649571035"
+ms2 = "55649571037"
+ms3 = "55649571036"
+ms4 = "55649571041"
+oliverKroemer = "8520193800"
+klausPetry = "7004446817"
+
+
+# KIT, ADL
+ivanPeric = "9043482900"
+felixEhrler = "56674370500"
+robertoBlanco = "56927736400"
+alenaWeber = ""
+
+
+# KIT, AVT
+thomasBlank = "56819218800"
+simonBischof = "" 
+anBao = "57192082222"
+helgeWurst = "" 
+
+# KIT, SWM
+nicoleRuiter = "6507953977"
+torstenHopp = "24469880700"
+michaelZapf = "19640815400"
+
+
+# KIT, IPE
+marcWeber = "56654729000"
+mw2 = "56603987800"
+mw3 = "7404138824"
+
+
+
+# Definition of workgroups for automatic Scopus publication retrieval
+
+sc_start = 2016
+sc_citations = False
+sc_keywords = True
+sc_max_authors = 25
+
+ipe_pdv = [andreasKopmann,ak2,surenChilingaryan,matthiasVogelgesang,timoDritschler,armenBeglarian,horstDemattio,petraRohr,norbertKunka]
+ipe_eps = [matthiasBalzer,lorenzoRota,micheleCaselle,mc2,matthiasKleifges,alexanderMenshikov,denisTcherniakhovski,oliverSander]
+ipe_msa = [thomasKuehner,saschaWuestling,larsEisenblaetter,djornKarnick,marcSchneider,ms2,ms3,ms4,oliverKroemer,klausPetry]
+ipe_avt = [thomasBlank]
+ipe_adl = [ivanPeric,felixEhrler,robertoBlanco]
+ipe_swm = [nicoleRuiter,torstenHopp,michaelZapf] 
+
+
+sc_workgroups = [
+{'name':"pdv",'authors':ipe_pdv},
+{'name':"eps",'authors':ipe_eps},
+{'name':"msa",'authors':ipe_msa},
+{'name':"avt",'authors':ipe_avt},
+{'name':"adl",'authors':ipe_adl},
+{'name':"swm",'authors':ipe_swm}
+]
+""" Definition of the workgroups
+    
+    Each workgroup is defined by a list of Scopus ID's and the
+    name of the category to be used in Wordpress. The category
+    for a new workgroup has to be created in Wordpress before
+    adding publications
+    """
+
+
+
+

+ 30 - 0
log/scopus-publications-held-de.log

@@ -0,0 +1,30 @@
+/root/scopus-held/scopus_get_publications.py
+
+      Date	    Time	NPubs	NNewP	NCite	NNewC	          TRun
+------------------------------------------------------------------------------
+2018-03-20	11:32:16	    2	    2	    1	    1	0:00:09.030294
+2018-03-20	11:36:50	    6	    4	    4	    3	0:00:22.298699
+2018-03-20	11:38:24	    6	    0	    4	    0	0:00:00.572656
+2018-03-20	11:40:04	  121	  115	  457	  453	0:25:36.380202
+2018-03-20	20:32:54	  199	   78	  743	  286	0:15:33.520534
+2018-03-21	14:45:45	  435	    0	 2367	 1624	1:27:40.148543
+2018-03-21	16:26:27	  435	    0	 2367	    0	0:00:00.029061
+2018-03-21	16:36:16	  553	    0	 3050	  683	0:37:06.826196
+2018-03-21	18:03:06	  553	    0	 3050	    0	0:00:20.440720
+2018-03-21	18:04:17	  553	    1	 3050	    0	0:00:19.556630
+2018-03-21	18:29:03	  553	    0	 3050	    0	0:00:26.512658
+2018-03-21	18:30:17	  553	    0	 3050	    0	0:00:09.612537
+2018-03-21	18:30:53	  554	    1	 3050	    0	0:00:50.147331
+2018-05-18	09:41:42	  561	    0	     	     	0:00:00.026463
+2018-05-18	09:42:11	  561	   14	     	     	0:02:33.279780
+2018-05-18	09:49:56	  575	    0	     	     	0:00:52.256674
+2018-05-18	10:42:33	   51	    2	     	     	0:00:23.904634
+2018-05-18	11:25:15	   53	  196	     	     	0:22:02.473417
+2018-05-18	14:10:34	  344	  119	     	     	0:07:25.366567
+2018-05-18	14:37:46	  460	    8	     	     	0:01:36.838421
+2018-05-18	15:36:54	  661	  110	     	     	0:07:38.430624
+2018-05-18	17:53:40	  771	  178	     	     	0:19:25.439346
+2018-05-18	20:25:32	  948	    0	     	     	0:03:00.877839
+2018-06-12	23:25:24	  948	   24	     	     	0:09:37.270052
+2018-06-12	23:39:40	  972	    0	     	     	0:01:19.301210
+2018-08-02	13:58:27	  972	   55	     	     	0:07:36.934371

+ 15 - 0
log/scopus-publications-ufo-kit-edu-ipe.log

@@ -0,0 +1,15 @@
+/root/scopus-ipe/scopus_get_publications.py
+
+      Date	    Time	NPubs	NNewP	NCite	NNewC	          TRun
+------------------------------------------------------------------------------
+2018-03-26	10:43:07	    0	    0	    0	    0	0:00:04.129658
+2018-03-26	10:44:58	    0	    0	    0	    0	0:00:04.117248
+2018-03-26	10:48:31	    7	    7	    1	    1	0:00:18.401608
+2018-03-26	10:49:04	    7	    0	    1	    0	0:00:03.431426
+2018-03-26	10:56:50	   80	   73	  158	  157	0:06:00.993327
+2018-03-26	11:20:55	  145	   65	  466	  308	0:11:11.811925
+2018-03-26	11:37:46	  145	    0	  466	    0	0:00:08.930936
+2018-03-27	12:05:16	  219	   74	 1070	  604	0:26:17.795581
+2018-08-02	16:49:58	  232	    0	     	     	0:00:05.493137
+2018-08-02	17:03:36	  232	   19	     	     	0:00:43.624238
+2018-08-02	17:17:27	  251	    8	     	     	0:00:25.399486

+ 45 - 0
log/scopus-publications-ufo-kit-edu.log

@@ -33,3 +33,48 @@ scopus-get-publications.py
 2017-06-26	10:02:37	  123	    1	  474	    5	0:00:38.160844
 2017-07-03	11:57:21	  123	    0	  477	    3	0:00:36.215621
 2017-07-10	11:23:12	  123	    0	  477	    0	0:00:23.060878
+2017-07-17	16:28:08	  123	    0	  477	    0	0:00:19.477841
+2017-07-18	07:04:21	  123	    0	  477	    0	0:00:19.033081
+2017-07-28	12:47:45	  124	    1	  483	    6	0:04:22.762453
+2017-08-03	08:54:13	  127	    3	  491	    8	0:00:54.324282
+2017-09-07	15:58:21	  131	    4	  506	   15	0:01:17.659050
+2017-09-20	12:34:40	  132	    1	  515	    9	0:00:47.592076
+2017-10-04	12:59:36	  132	    0	  525	   10	0:00:58.699665
+2017-10-16	15:45:25	  135	    3	  537	   12	0:01:15.207884
+2017-10-25	09:32:38	  136	    1	  541	    4	0:00:34.457160
+2017-11-20	14:45:35	  139	    3	  557	   16	0:01:11.767109
+2017-11-25	09:02:38	  140	    1	  558	    1	0:00:11.630266
+2017-12-19	12:33:19	  143	    3	  585	   27	0:02:10.142203
+2017-12-19	12:47:48	  143	    0	  585	    0	0:00:10.245296
+2017-12-19	12:49:32	  143	    0	  585	    0	0:00:27.280789
+2018-01-13	16:15:40	  145	    2	  602	   17	0:01:03.821972
+2018-01-30	16:10:36	  147	    2	  620	   18	0:01:13.152029
+2018-02-08	17:47:46	  147	    0	  624	    4	0:00:17.760916
+2018-03-02	07:40:09	  149	    2	  652	   28	0:01:43.300644
+2018-03-20	11:05:30	  148	    0	  660	    8	0:00:58.301810
+2018-03-20	11:06:32	  148	    0	  660	    0	0:00:10.112241
+2018-03-20	11:50:46	  148	    0	  660	    0	0:00:13.964116
+2018-03-26	10:43:38	  148	    0	  663	    3	0:00:18.869935
+2018-04-16	09:41:47	  149	    1	  667	    4	0:00:33.428451
+2018-04-16	09:52:02	  159	   10	  739	   72	0:03:30.819239
+2018-04-19	19:26:00	  160	    1	  745	    6	0:00:54.086920
+2018-04-23	21:40:42	  160	    0	  745	    0	0:00:10.334309
+2018-04-25	12:36:59	  160	    0	  745	    0	0:00:25.924248
+2018-06-11	20:21:22	  160	    8	     	     	0:00:43.208232
+2018-07-11	10:27:20	  168	    3	     	     	0:00:19.377681
+2018-08-02	14:57:31	  171	    4	     	     	0:00:22.646739
+2018-09-13	11:51:45	  175	    2	     	     	0:00:20.734607
+2018-10-24	17:55:23	  177	    3	     	     	0:00:22.608117
+2018-11-18	23:32:23	  183	    2	     	     	0:00:16.469450
+2018-11-19	09:06:09	  183	    2	     	     	0:00:18.915039
+2018-11-19	09:09:53	  185	    0	     	     	0:00:15.178698
+2018-11-19	09:18:01	  183	    2	     	     	0:00:18.858278
+2018-11-19	09:40:11	  183	    2	     	     	0:00:20.500910
+2018-11-19	09:41:27	  183	    2	     	     	0:00:18.911137
+2018-11-19	09:49:00	  183	    2	     	     	0:00:18.318267
+2018-11-19	10:02:09	  183	    2	     	     	0:00:18.999183
+2018-11-19	10:14:01	  183	    2	     	     	0:00:27.101883
+2018-11-19	11:03:17	  185	    2	     	     	0:00:23.241898
+2018-11-19	11:11:30	  187	    0	     	     	0:00:25.172730
+2018-12-20	18:07:13	  187	    4	     	     	0:00:24.450218
+2019-07-12	22:23:41	  191	    9	     	     	0:00:27.399886

+ 33 - 0
rm-scopusid.py

@@ -0,0 +1,33 @@
+# Remove scopus ID from publication database
+#
+
+import sys
+
+# Mysql persistent data (Accout: scopus, $scopus$)
+import pymysql.cursors
+import pymysql
+
+from config import *
+
+
+if len(sys.argv) > 1:
+
+    scopusid = sys.argv[1]
+
+    # Connect to the database
+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
+
+    try:
+        with connection.cursor() as cursor:
+
+            sql = "DELETE FROM publications WHERE scopusid=\"%s\" " % scopusid
+            print sql;
+            cursor.execute(sql)
+            connection.commit()
+
+    finally:
+        connection.close()
+
+
+
+

+ 27 - 19
scopus_get_publications.py

@@ -108,7 +108,8 @@ def update_publications(authids,authname='',scopus_opts = '',max=0):
                     try:
                         catlist = json.loads(cat)
                     except TypeError:
-                        print("No categories upto now")
+			#print("No categories upto now")
+			pass
 
                     if authname not in catlist:
                         catlist += [authname]
@@ -128,9 +129,6 @@ def update_publications(authids,authname='',scopus_opts = '',max=0):
 def update_citations():
     """ Read all citations and store in the citation table """
 
-
-    global npubs
-    
     print ""
     print "=== Update citatation of all publication in the database"
     
@@ -146,10 +144,6 @@ def update_citations():
             sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications WHERE wpid > 0"
             cursor.execute(sql)
             result = cursor.fetchall()
-          
-            print "Total number of publications is %d" % len(result)
-            npubs = len(result)
-            #print "Npubs = %d" % npubs
 
             for pub in result:
                 wpid = int(pub['wpid'])
@@ -207,7 +201,7 @@ def update_citations():
 def update_wp_posts():
     """ Create wordpress posts for all entries that have none """
 
-
+    global npubs
     global nnewpubs
 
     print ""
@@ -220,6 +214,15 @@ def update_wp_posts():
     # Todo: Shift to a separate script !?
     try:
         with connection.cursor() as cursor:
+            # Read a single record
+            sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications WHERE wpid > 0"
+            cursor.execute(sql)
+            result = cursor.fetchall()
+
+            print "Total number of publications is %d" % len(result)
+            npubs = len(result)
+            #print "Npubs = %d" % npubs
+
             # Count all publications
             #sql = "SELECT COUNT(id) FROM publications"
             #cursor.execute(sql)
@@ -366,15 +369,13 @@ if __name__ == "__main__":
 
     # read all citations
     # Todo: read only new citations?!
+    if sc_citations:
+        update_citations()
 
-    update_citations()
-
-
-    # loop over all cites and post comments to wordpress, when necessary
-    # update database
+        # loop over all cites and post comments to wordpress, when necessary
+        # update database
 
-    update_wp_comments()
-    # Todo: deactivate comments for scopus posts!!!
+        update_wp_comments()
 
 
     # Display summary
@@ -384,8 +385,9 @@ if __name__ == "__main__":
     print "Date       = " + str(start)
     print "NPubs      = " + str(npubs)
     print "NNewPubs   = " + str(nnewpubs)
-    print "NCites     = " + str(ncites)
-    print "NNewCites  = " + str(nnewcites)
+    if sc_citations:
+        print "NCites     = " + str(ncites)
+        print "NNewCites  = " + str(nnewcites)
     print "Runtime    = " + str(end - start)
 
 
@@ -402,9 +404,15 @@ if __name__ == "__main__":
 
 
     log = open(log_file,"a")
-    log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
+    if sc_citations:
+        log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
                                         start.strftime("%H:%M:%S"),
                                         npubs,nnewpubs,ncites,nnewcites,str(end-start)))
+    else:
+        log.write("%s\t%s\t%5d\t%5d\t%5s\t%5s\t%s\n" % (start.strftime("%Y-%m-%d"),
+                                        start.strftime("%H:%M:%S"),
+                                        npubs,nnewpubs,"","",str(end-start)))
+
     log.close()
 
     # done

+ 31 - 0
test-citations.py

@@ -0,0 +1,31 @@
+import requests
+import json
+from pprint import pprint
+
+from config import *
+
+#
+# Script to check all the afiliations of the authors
+#
+
+SCOPUS_ID = "SCOPUS_ID:85039766090"
+EID = "2-s2.0-84946782439" 
+
+#url = "https://api.elsevier.com/content/search/scopus?query=refeid(" + EID + ")" 
+#url = "https://api.elsevier.com/content/abstract/citations/scopus_id/" + SCOPUS_ID 
+url = "http://api.elsevier.com/content/abstract/scopus_id/" + SCOPUS_ID
+
+
+resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':DTS_API_KEY})
+
+results = resp.json()
+pprint(results)
+
+
+
+
+
+
+
+
+

+ 62 - 0
test-citations2.py

@@ -0,0 +1,62 @@
+import requests
+import json
+from pprint import pprint
+
+from config import *
+
+#
+# Script to check all the afiliations of the authors
+#
+
+EID = "2-s2.0-84946782439" 
+
+url = "https://api.elsevier.com/content/search/scopus?query=refeid(" + EID + ")" 
+
+resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
+
+results = resp.json()
+pprint(results)
+
+exit()
+
+
+
+count = 25
+n = 0
+npubstoget = 25
+start = 0
+ntotal = 0
+publist = []
+
+while (npubstoget > 0):
+
+        loopargs = "&count=%d&start=%d" % (count, start)
+        #print loopargs
+
+        url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
+               + EID + ")" + loopargs)
+
+        print "URL: " + url
+        resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
+
+        results = resp.json()
+        pprint(results)
+        #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': '))
+
+        if (n==0):
+            n = int(results['search-results']['opensearch:totalResults'])
+            #print "Current number citations in scopus = %d" % n
+            npubstoget = n
+
+        if (n>0):
+                publist += results['search-results']['entry']
+
+        npubstoget = npubstoget - count
+        start += count
+
+
+
+
+
+
+

+ 8 - 8
test-scopus.py

@@ -191,11 +191,11 @@ def get_scopus_brief(SCOPUS_ID, max_authors=1000):
 # List of newly cited publications
 #
 
+print get_scopus_info("SCOPUS_ID:84969498463")
 
-
-resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&view=metrics",
-            headers={'Accept':'application/json',
-                             'X-ELS-APIKey': MY_API_KEY})
+#resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&view=metrics",
+#            headers={'Accept':'application/json',
+#                             'X-ELS-APIKey': MY_API_KEY})
 
 #print resp
 
@@ -215,7 +215,7 @@ resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&vie
 #publist = get_scopus_list(ufo_ips, 'PUBYEAR = 2015', 30)
 
 # Exclude authors?
-publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
+#publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
 
 
 # Author ausschliessen - black list !!!
@@ -224,10 +224,10 @@ publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
 #
 # Display the result
 #
-print "Number of publications: %d" % len(publist)
+#print "Number of publications: %d" % len(publist)
 
-for pub in publist:
-    print get_scopus_brief(pub,10000)
+#for pub in publist:
+#    print get_scopus_brief(pub,10000)
 
 
 # Test printing functions

+ 33 - 7
test-wp.py

@@ -9,9 +9,20 @@ from wordpress_xmlrpc import WordPressPost, WordPressComment
 from wordpress_xmlrpc.methods.posts import GetPosts, NewPost, EditPost
 from wordpress_xmlrpc.methods.comments import NewComment, EditComment
 from wordpress_xmlrpc.methods.users import GetUserInfo
+from wordpress_xmlrpc.methods.taxonomies import GetTerms
 
 from config import *
 
+def wordpress_get_category(slug):
+     # Load taxonomy and search for the slug
+     catlist = wp.call(GetTerms('category'))
+
+     for cat in catlist:
+         if cat.slug == slug:
+             return cat
+
+
+
 # Use Wordpress account - not the mysql credentials
 # Todo: use scopus later !!!
 wp = Client(wp_api_url,wp_user,wp_password)
@@ -19,23 +30,38 @@ wp = Client(wp_api_url,wp_user,wp_password)
 
 #print wp.call(GetUserInfo())
 
+# Test access to categories
+categories = []
+cat = wordpress_get_category("sensors")
+
+if cat: 
+    print "Found ", cat.name
+    categories.append(cat)
+
+
 # Todo: Set the date of the post according to the scopus date
 
 post = WordPressPost()
 post.title = 'My post 7' # put title of the publication here
 post.slug = 'DOIxxxxx7' # set the name of the post different to the title
 post.content = 'This is a more complete example post about XML-RPC (but still not comlete enough)'
-post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+#post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+post.terms = categories
 
 post.terms_names = {
-    # 'post_tag': ['test', 'firstpost'], # what's that? I don't use it currently
-    'category': ['Publications', 'Reports'] # defined in WP + python script
+    #'post_tag': ['test', 'firstpost'], # what's that? I don't use it currently
+    'category': ['Publications', 'asics'] # defined in WP + python script
 }
 
 # whoops, I forgot to publish it!
 post.post_status = 'publish' # alternative is draft here !
 post.comment_status = 'open' # allow comments - may be only for scopus
-wp.call(EditPost(post.id, post))# Update the before created post
+post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+#wp.call(EditPost(post.id, post))# Update the before created post
+
+print "Created Wordpress post ", post.id
 
 # Todo:
 # Save the id in the publication table, together with the with the scopus id
@@ -50,10 +76,10 @@ wp.call(EditPost(post.id, post))# Update the before created post
 # or my biotech items???
 #
 
-comment = WordPressComment()
-comment.content = 'Hi, thats cool - we can also add our comments automatically'
+#comment = WordPressComment()
+#comment.content = 'Hi, thats cool - we can also add our comments automatically'
 
-comment.id = wp.call(NewComment(post.id, comment))
+#comment.id = wp.call(NewComment(post.id, comment))
 
 
 

+ 2 - 2
test-wp2.py

@@ -30,10 +30,10 @@ if len(sys.argv) > 1:
 # Read post
 try:
     post = wp.call(GetPost(wpid))
-    print "Post %d: %s" %(wpid,post.title)
+    print ("Post %d: %s" %(wpid,post.title))
 
 except:
-    print "Post %d seems to be not available" % wpid
+    print ("Post %d seems to be not available" % wpid)
 
 
 

+ 2 - 1
update.sh

@@ -1,6 +1,7 @@
+#!/bin/bash
 # Update publications on UFO webpage by new data in Scopus
 # A Kopmann, 11.4.2017
 #
 
-python -W ignore /root/scopus/scopus_get_publications.py
+python -W ignore "`pwd`/scopus_get_publications.py"