Browse Source

Initial version

Andreas Kopmann 7 years ago
commit
e58e305e1d
7 changed files with 1398 additions and 0 deletions
  1. 368 0
      README.txt
  2. 265 0
      ak_scopus.py
  3. 191 0
      ak_wordpress.py
  4. 54 0
      create_scopus.sql
  5. 101 0
      my_scopus.py
  6. 413 0
      scopus-get-publications.py
  7. 6 0
      update.sh

+ 368 - 0
README.txt

@@ -0,0 +1,368 @@
+README scopus
+Ak, 27.3.2017
+
+Get information from Scopus database.
+
+This queries work only with access to Scopus (e.g. from KIT LAN)
+Scopus service is not public vailable.
+
+
+Content
+info		Documentation, website, etc
+
+readme.txt	This file
+my_scopus.py	List of scopus author ids
+ak_scopus.py	Functions to access scopus
+ak_wordpress.py Functions to creates Wordpress posts + comments
+scopus-get-publications.py Script to query Scopus
+
+test-scopus.py	Application with some functions to get publication entries
+		Prints a list with some formatting
+test-scopus2.py Example from one of the website, only one query
+
+
+Usage: 
+1. Go to Scopus and retrieve the scopus author ids for the scientists in your group.
+Define the ids in my_scopus.py and group them.
+
+2. Select one of more author groups in scopus-get-publications.py (main part at
+the end of the file). Check definition of database and wordpress installation.
+
+3. Execute scopus-get-publications.py.
+python -W ignore scopus-get-publications.py
+
+Note: The -W ignore flag might be necessary if the INSERT IGNORE causes warnings.
+
+Example run:
+ufo:~/scopus # python -W ignore scopus-get-publications.py 
+
+***********************************************
+**** scopus-get-publications / 2017-03-27 *****
+***********************************************
+
+=== Update of publications for the author group: Computing
+Total number of publications: 54
+=== Update of publications for the author group: X-ray Imaging
+Total number of publications: 39
+=== Update of publications for the author group: Electronics
+Total number of publications: 132
+=== Update of publications for the author group: Morphology
+Total number of publications: 21
+
+=== Create posts for newly registered publication in scopus
+Nothing new found
+
+=== Update citatation of all publication in the database
+Total number of publications is 281
+
+=== Create comments for newly registered citations in scopus
+Number of new citations is 0
+
+Summary: (see also logfile /root/scopus/scopus-publications.log) 
+Date       = 2017-03-27 21:28:36.002624
+NPubs      = 281
+NNewPubs   = 0
+NCites     = 4699
+NNewCites  = 0
+Runtime    = 0:00:11.496362
+
+
+Further enhancements
+
+Todo:
+- Reprocessing of all post, if the format has changed 
+E.g. add button 
+with Email to author or a new category has been added
+- Query only the latest citations for each publications not all.
+- Store JSON-Data of all publications
+- Get bibliographic information for display at the web page of a reseach group 
+like UFO or may be also later for the DTS program.
+- Handle wrong publications in scopus for author with same name
+- Automatically include reports and student thesis by
+bibtex definition and upload on a server!?
+-> Would have the nice effect, that all student work is organized systematically!!!
+
+
+
+Structure of the database
+
+Both tables keep the reference to the publications in Scopus and the 
+Wordpress ids. With this information, reprocessing is possible (but not
+implemented now).
+
+
+Table publocations:
+
+MariaDB [scopus]> describe publications;
++--------------+--------------+------+-----+---------+----------------+
+| Field        | Type         | Null | Key | Default | Extra          |
++--------------+--------------+------+-----+---------+----------------+
+| id           | int(11)      | NO   | PRI | NULL    | auto_increment |
+| scopusid     | varchar(255) | YES  | UNI | NULL    |                |
+| wpid         | int(11)      | YES  |     | NULL    |                |
+| citedbycount | int(11)      | YES  |     | NULL    |                |
+| citesloaded  | int(11)      | YES  |     | NULL    |                |
+| categories   | varchar(255) | YES  |     | NULL    |                |
+| doi          | varchar(255) | YES  |     | NULL    |                |
+| title        | varchar(255) | YES  |     | NULL    |                |
+| abstract     | text         | YES  |     | NULL    |                |
+| bibtex       | text         | YES  |     | NULL    |                |
+| ts           | datetime     | YES  |     | NULL    |                |
+| scopusdata   | text         | YES  |     | NULL    |                |
+| eid          | varchar(255) | YES  |     | NULL    |                |
++--------------+--------------+------+-----+---------+----------------+
+
+Table citations:
+
+MariaDB [scopus]> describe citations;
++--------------+--------------+------+-----+---------+----------------+
+| Field        | Type         | Null | Key | Default | Extra          |
++--------------+--------------+------+-----+---------+----------------+
+| id           | int(11)      | NO   | PRI | NULL    | auto_increment |
+| scopusid     | varchar(255) | YES  |     | NULL    |                |
+| eid          | varchar(255) | YES  |     | NULL    |                |
+| wpid         | int(11)      | YES  | MUL | NULL    |                |
+| wpcommentid  | int(11)      | YES  |     | NULL    |                |
+| citedbycount | int(11)      | YES  |     | NULL    |                |
+| citesloaded  | int(11)      | YES  |     | NULL    |                |
+| categories   | varchar(255) | YES  |     | NULL    |                |
+| doi          | varchar(255) | YES  |     | NULL    |                |
+| scopusdata   | text         | YES  |     | NULL    |                |
+| title        | varchar(255) | YES  |     | NULL    |                |
+| abstract     | text         | YES  |     | NULL    |                |
+| bibtex       | text         | YES  |     | NULL    |                |
+| ts           | datetime     | YES  |     | NULL    |                |
++--------------+--------------+------+-----+---------+----------------+
+
+
+
+Setup of scopus database in mysql 
+
+create database scopus;
+
+CREATE USER 'scopus@localhost';
+grant all on scopus.* to 'scopus'@'localhost' identified by '$scopus$';
+
+# create tables
+mysql -u scopus -p scopus < create_scopus.sql
+
+
+
+Publications in Scopus:
+
+Sometime (unfortunately quite often) a author id in Scopus is not unique but 
+identifies several researchers with the same name. E.g. Michele Caselle (3 persons)
+Matthias Balzer (2).
+
+This case is currently handled manually by deleting all publications from the unknown
+authors. Might be possible to implement also a black list??
+
+
+Sample data from Scopus:
+{
+    "abstracts-retrieval-response": {
+        "authors": {
+            "author": [
+                {
+                    "@_fa": "true",
+                    "@auid": "15076530600",
+                    "@seq": "1",
+                    "affiliation": {
+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
+                        "@id": "60102538"
+                    },
+                    "author-url": "http://api.elsevier.com/content/author/author_id/15076530600",
+                    "ce:given-name": "Suren",
+                    "ce:indexed-name": "Chilingaryan S.",
+                    "ce:initials": "S.",
+                    "ce:surname": "Chilingaryan",
+                    "preferred-name": {
+                        "ce:given-name": "Suren",
+                        "ce:indexed-name": "Chilingaryan S.",
+                        "ce:initials": "S.",
+                        "ce:surname": "Chilingaryan"
+                    }
+                },
+                {
+                    "@_fa": "true",
+                    "@auid": "35313939900",
+                    "@seq": "2",
+                    "affiliation": {
+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
+                        "@id": "60102538"
+                    },
+                    "author-url": "http://api.elsevier.com/content/author/author_id/35313939900",
+                    "ce:given-name": "Andreas",
+                    "ce:indexed-name": "Kopmann A.",
+                    "ce:initials": "A.",
+                    "ce:surname": "Kopmann",
+                    "preferred-name": {
+                        "ce:given-name": "Andreas",
+                        "ce:indexed-name": "Kopmann A.",
+                        "ce:initials": "A.",
+                        "ce:surname": "Kopmann"
+                    }
+                },
+                {
+                    "@_fa": "true",
+                    "@auid": "56001075000",
+                    "@seq": "3",
+                    "affiliation": {
+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60032633",
+                        "@id": "60032633"
+                    },
+                    "author-url": "http://api.elsevier.com/content/author/author_id/56001075000",
+                    "ce:given-name": "Alessandro",
+                    "ce:indexed-name": "Mirone A.",
+                    "ce:initials": "A.",
+                    "ce:surname": "Mirone",
+                    "preferred-name": {
+                        "ce:given-name": "Alessandro",
+                        "ce:indexed-name": "Mirone A.",
+                        "ce:initials": "A.",
+                        "ce:surname": "Mirone"
+                    }
+                },
+                {
+                    "@_fa": "true",
+                    "@auid": "35277157300",
+                    "@seq": "4",
+                    "affiliation": {
+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
+                        "@id": "60102538"
+                    },
+                    "author-url": "http://api.elsevier.com/content/author/author_id/35277157300",
+                    "ce:given-name": "Tomy",
+                    "ce:indexed-name": "Dos Santos Rolo T.",
+                    "ce:initials": "T.",
+                    "ce:surname": "Dos Santos Rolo",
+                    "preferred-name": {
+                        "ce:given-name": "Tomy",
+                        "ce:indexed-name": "Dos Santos Rolo T.",
+                        "ce:initials": "T.",
+                        "ce:surname": "Dos Santos Rolo"
+                    }
+                },
+                {
+                    "@_fa": "true",
+                    "@auid": "35303862100",
+                    "@seq": "5",
+                    "affiliation": {
+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
+                        "@id": "60102538"
+                    },
+                    "author-url": "http://api.elsevier.com/content/author/author_id/35303862100",
+                    "ce:given-name": "Matthias",
+                    "ce:indexed-name": "Vogelgesang M.",
+                    "ce:initials": "M.",
+                    "ce:surname": "Vogelgesang",
+                    "preferred-name": {
+                        "ce:given-name": "Matthias",
+                        "ce:indexed-name": "Vogelgesang M.",
+                        "ce:initials": "M.",
+                        "ce:surname": "Vogelgesang"
+                    }
+                }
+            ]
+        },
+        "coredata": {
+            "citedby-count": "0",
+            "dc:description": "X-ray tomography has been proven to be a valuable tool for understanding internal, otherwise invisible, mechanisms in biology and other fields. Recent advances in digital detector technology enabled investigation of dynamic processes in 3D with a temporal resolution down to the milliseconds range. Unfortunately it requires computationally intensive recon- struction algorithms with long post-processing times. We have optimized the reconstruction software employed at the micro-tomography beamlines at KIT and ESRF. Using a 4 stage pipelined architecture and the computational power of modern graphic cards, we were able to reduce the processing time by a factor 75 with a single server. The time required to reconstruct a typical 3D image is reduced down to several seconds only and online visualization is possible for the first time.Copyright is held by the author/owner(s).",
+            "dc:identifier": "SCOPUS_ID:84859045029",
+            "dc:title": "Poster: A GPU-based architecture for real-time data assessment at synchrotron experiments",
+            "link": [
+                {
+                    "@_fa": "true",
+                    "@href": "http://api.elsevier.com/content/abstract/scopus_id/84859045029",
+                    "@rel": "self"
+                }
+            ],
+            "prism:aggregationType": "Conference Proceeding",
+            "prism:coverDate": "2011-12-01",
+            "prism:doi": "10.1145/2148600.2148624",
+            "prism:pageRange": "51-52",
+            "prism:publicationName": "SC'11 - Proceedings of the 2011 High Performance Computing Networking, Storage and Analysis Companion, Co-located with SC'11",
+            "prism:url": "http://api.elsevier.com/content/abstract/scopus_id/84859045029"
+        }
+    }
+}
+
+
+
+Installation of python, mysql et al:
+
+pip install python-wordpress-xmlrpc
+
+Konfiguration Webserver (muss man wohl nach jeder Installation neu machen!!!)
+
+/etc/apache2/httpd.conf:
+LoadModule userdir_module libexec/apache2/mod_userdir.so
+LoadModule php5_module libexec/apache2/libphp5.so
+Include /private/etc/apache2/extra/httpd-userdir.conf
+
+/etc/apache2/extra/httpd-userdir.conf:
+Include /private/etc/apache2/users/*.conf
+
+/etc/php.ini:
+pdo_mysql.default_socket= /tmp/mysql.sock
+mysql.default_socket = /tmp/mysql.sock
+mysqli.default_socket = /tmp/mysql.sock
+
+
+sh-3.2# apachectl restart
+
+
+Install website:
+
+Create archive with wp dublicator
+
+Save scopus database
+
+mysqldump -u scopus -p scopus > scopus-170322.sql
+
+
+Create database on remote system
+
+mysql:
+CREATE USER 'scopus'@'localhost' IDENTIFIED BY '$scopus$';
+GRANT ALL PRIVILEGES ON scopus.* TO 'scopus'@'localhost';
+
+CREATE DATABASE scopus;
+
+mysql -u scopus -p scopus < scopus-170322.sql
+
+
+Create database wp_ufo2;
+
+CREATE USER ‘ufo’@‘localhost' IDENTIFIED BY '$ipepdv$';
+GRANT ALL PRIVILEGES ON wp_ufo2.* TO ‘ufo’@‘localhost';
+
+CREATE DATABASE wp_ufo2;
+
+
+Import WP archive:
+mkdir ufo2
+chown -R wwwrun:www ufo2
+
+http://ufo.kit.edu/ufo2/installer.php
+
+
+Error: PHP module ZipArchive is missing 
+Manual extraction is available in the advanced options !!!
+
+
+Installation Scopus-Scripts:
+
+pip install requests
+pip install python-wordpress-xmlrpc
+pip install pymysql
+
+Check configurations:
+scopus-get-piblications.py
+ak_wordpress.py
+
+
+
+
+
+

+ 265 - 0
ak_scopus.py

@@ -0,0 +1,265 @@
+import requests
+import json
+from my_scopus import MY_API_KEY
+
+
+#
+# Get the last N publications of an given author list
+#
+# Arguments: list of scopus author ids (e.g. "35313939900")
+# Returns: list of scopus ids, electronic id and number of citations
+# DOI would be desirable but is not available in some records
+# e.g. ('SCOPUS_ID:0037368024',)
+# The result can be used in the display functions like get_scopus_brief()
+#
+def get_scopus_list(author_list, opt='', max=0):
+    
+    count = 25 # define the number of requests publications in one call
+
+
+    if isinstance(author_list, list):
+        #print "Length of author list %d" % len(author_list)
+        query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
+    else:
+        query = 'AU-ID('+author_list+')'
+
+    if len(opt) > 0:
+        query = query + " AND " + opt
+    
+# The scopus query is limited by the number of results
+# To get all results a loop over all results is required
+# The loop is controlled by count and start
+
+# limit the results by time: Arguments?
+# e.g. PUBYEAR AFT 2010
+
+    if (max > 0) and (max < count):
+        npubstoget = max
+        count = max
+    else:
+        npubstoget = count
+
+    n = 0
+    start = 0
+    ntotal = 0
+    publist = []
+    while (npubstoget > 0):
+        
+        loopargs = "&count=%d&start=%d" % (count, start)
+        #print loopargs
+
+        #print "Query: " + query
+        url = ("http://api.elsevier.com/content/search/scopus?query="
+               +query+ "&field=dc:identifier,citedby-count,eid" + loopargs)
+        #print "URL: " + url
+        resp = requests.get(url,
+                headers={'Accept':'application/json',
+                        'X-ELS-APIKey': MY_API_KEY})
+        #print resp
+        results = resp.json()
+
+
+        if (n==0):
+            n = int(results['search-results']['opensearch:totalResults'])
+            #print "Number publications found = %d" % n
+            if (max == 0):
+                npubstoget = n
+
+        #print json.dumps(resp.json(),
+        #             sort_keys=True,
+        #             indent=4, separators=(',', ': '))
+
+        newpubs = []
+        for r in results['search-results']["entry"]:
+            #print r
+            try:
+                newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]]
+            except KeyError:
+                print "Warning: There is data missing"
+                print r
+
+# Todo : DOI is not always available !!!
+
+
+        #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]]
+        #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]]
+
+        publist += newpubs
+
+# Todo: Counting by the list is dangerous - if an element is missing !!!
+
+        nreceived = len(newpubs)
+        nlist = len(publist)
+        #print "Received: %d" %nreceived
+        #print "In list= %d" %nlist
+
+        # Next iteration
+        #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max)
+        npubstoget = npubstoget - count
+        start += count
+
+    return publist
+
+
+def get_scopus_data(SCOPUS_ID):
+    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
+           + SCOPUS_ID
+           + "?field=article-number,title,publicationName,volume,issueIdentifier,"
+           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
+    #print url
+    resp = requests.get(url,
+            headers={'Accept':'application/json',
+            'X-ELS-APIKey': MY_API_KEY})
+            
+    #results = json.loads(resp.text.encode('utf-8'))
+    results = resp.json()
+    return results
+
+
+
+def get_scopus_refs(EID):
+    # Todo: implement loop, if there are more than 25 citations !!!
+    #
+
+
+    count = 25
+    n = 0
+    npubstoget = 25
+    start = 0
+    ntotal = 0
+    publist = []
+
+    while (npubstoget > 0):
+    
+        loopargs = "&count=%d&start=%d" % (count, start)
+        #print loopargs
+
+        url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
+               + EID + ")" + loopargs)
+
+        #print "URL: " + url
+        resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
+
+        results = resp.json()
+        #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': '))
+
+        if (n==0):
+            n = int(results['search-results']['opensearch:totalResults'])
+            #print "Current number citations in scopus = %d" % n
+            npubstoget = n
+
+        if (n>0):
+                publist += results['search-results']['entry']
+
+        npubstoget = npubstoget - count
+        start += count
+
+
+    return publist
+
+
+
+def get_scopus_info(SCOPUS_ID):
+    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
+           + SCOPUS_ID
+           + "?field=article-number,title,publicationName,volume,issueIdentifier,"
+           + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
+    #print url
+    resp = requests.get(url,
+                headers={'Accept':'application/json',
+                        'X-ELS-APIKey': MY_API_KEY})
+    results = json.loads(resp.text.encode('utf-8'))
+    #print resp
+    #print results
+    
+    fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'
+
+    return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
+                                title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
+                                 journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
+                                 volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
+                                 articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
+                                             results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
+                                 date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
+                                 doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
+                                 cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
+                                 abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
+
+
+#
+# Display a list of publications in plain text format
+#
+# Argement: scopus id of the publication
+#
+# Todo: Implement other formats (e.g. html, bibtex)
+#   Format publications as articles, Title, Abstract
+#
+def get_scopus_brief(SCOPUS_ID, max_authors=1000):
+    id = SCOPUS_ID
+    if isinstance(id, list):
+        id = id[0]
+        
+    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
+           + id
+           + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
+           + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")
+
+    #print url
+    resp = requests.get(url,
+                headers={'Accept':'application/json',
+                            'X-ELS-APIKey': MY_API_KEY})
+    results = json.loads(resp.text.encode('utf-8'))
+    #print resp
+    #print results
+    
+    coredata = results['abstracts-retrieval-response']['coredata']
+    
+    pub = ''
+    authors = results['abstracts-retrieval-response']['authors']['author']
+    #print "Number of authors: %d" %len(authors)
+
+    if len(authors) > max_authors:
+        return ''
+
+    if len(authors) > 20:
+        pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
+    else:
+        pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '
+
+    try:
+        if coredata.get('dc:title'):
+            pub = pub + coredata.get('dc:title').encode('utf-8')
+    except ValueError:
+        print "!!! Error encoding title of publication !!!"
+        #print coredata.get('dc:title')
+        pub = pub + coredata.get('dc:title')
+
+    if coredata.get('prism:publicationName'):
+        pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')
+
+    if coredata.get('prism:volume'):
+        pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')
+
+    if coredata.get('prism:issueIdentifier'):
+        pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
+    
+    if coredata.get('prism:coverDate'):
+        pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '
+
+    if coredata.get('prism:pageRange'):
+        pub = pub + coredata.get('prism:pageRange').encode('utf-8')
+    elif coredata.get('article-number'):
+        pub = pub + coredata.get('article-number').encode('utf-8')
+
+    if coredata.get('prism:doi'):
+        pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')
+
+    if coredata.get('citedby-count'):
+        pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'
+
+    pub = pub + '.\n'
+
+
+    return pub
+
+

+ 191 - 0
ak_wordpress.py

@@ -0,0 +1,191 @@
+# Create posts via wordpress API
+# A. Kopmann 6.2.2017 (ak)
+#
+
+from datetime import datetime
+import json
+
+from wordpress_xmlrpc import Client
+from wordpress_xmlrpc import WordPressPost, WordPressComment
+from wordpress_xmlrpc.methods.posts import GetPost, NewPost, EditPost
+from wordpress_xmlrpc.methods.comments import NewComment, EditComment
+
+
+# Use Wordpress account - not the mysql credentials
+wp = Client('http://localhost/~kopmann/ufo2/xmlrpc.php', 'scopus', '$scopus$')
+
+
+#
+# create a post from a scopus query
+#
+def wordpress_post_by_scopus(data, category = []):
+    
+    coredata = data['abstracts-retrieval-response']['coredata']
+    try:
+        authors = data['abstracts-retrieval-response']['authors']['author']
+    except KeyError:
+        print "Have not found authors in dataset"
+        print " -> Is the connection to scopus broken???"
+        exit()
+
+    # decode date
+    tsstring = coredata['prism:coverDate'].encode('utf-8')
+    ts = datetime.strptime(tsstring, "%Y-%m-%d").timetuple()
+    year = ts.tm_year
+
+    # Display cover date and title
+    print("%s  -- %s" % (tsstring, coredata['dc:title']))
+
+
+    # define post structure
+    post = WordPressPost()
+    post.title = coredata['dc:title'].encode('utf-8')
+    post.date = ts
+
+    # set the name of the post different to the title
+    post.slug = coredata['dc:identifier'].encode('utf-8')
+    
+    post.excerpt = authors[0]['ce:indexed-name'].encode('utf-8')
+    if len(authors) > 2:
+        post.excerpt += " et al."
+    elif len(authors) == 2:
+        post.excerpt += u', ' + authors[1]['ce:indexed-name'].encode('utf-8')
+    post.excerpt += u', in <em>' + coredata['prism:publicationName'].encode('utf-8') + u'</em>'
+    if 'prism:volume' in coredata:
+        post.excerpt += u', ' + coredata['prism:volume'].encode('utf-8')
+    post.excerpt += u' (' + str(year).encode('utf-8') + u')'
+    if 'prism:pageRange' in coredata:
+        post.excerpt += u' ' + coredata['prism:pageRange'].encode('utf-8')
+    if 'article-number' in coredata:
+        post.excerpt += u', ' + coredata['article-number'].encode('utf-8')
+    post.excerpt += u'.'
+
+    post.content = u'<p>' + authors[0]['ce:indexed-name'].encode('utf-8')
+    authors.pop(0)
+    if len(authors) > 20:
+        post.content += " et al."
+    else:
+        for author in authors:
+            post.content += u', ' + author['ce:indexed-name'].encode('utf-8')
+    post.content += u'</p>'
+    post.content += u'<p>in <em>' + coredata['prism:publicationName'].encode('utf-8') + u'</em>'
+    if 'prism:volume' in coredata:
+        post.content += u', ' + coredata['prism:volume'].encode('utf-8')
+    post.content += u' (' + str(year).encode('utf-8') + u')'
+    if 'prism:pageRange' in coredata:
+        post.content += u' ' + coredata['prism:pageRange'].encode('utf-8')
+    if 'article-number' in coredata:
+        post.content += u', ' + coredata['article-number'].encode('utf-8')
+    post.content += u'.'
+    if 'prism:doi' in coredata:
+        post.content += u' DOI:' + coredata['prism:doi'].encode('utf-8')
+    post.content += u'</p>\n\n'
+    if 'dc:description' in coredata:
+        post.content += u'<div class="accordion-inner"><h4>Abstract</h4>' + coredata['dc:description']
+        if 'authkeywords' in coredata:
+            post.content += u'\n<b>Keywords:</b> ' + coredata['authkeywords'].encode('utf-8')
+        post.content += u'</div>'
+    if 'prism:doi' in coredata:
+        link = u'http://dx.doi.org/' + coredata['prism:doi'].encode('utf-8')
+        post.content += u'\n\n<div class="accordion-inner"><a class="btn btn-primary" href="' + link + u'"><i class="icon-download icon-white"></i> Get it</a></div>'
+
+    #print post.content
+
+    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
+
+
+    if category == '':
+        catlist = ['Publications']
+    else:
+        catlist = ['Publications'] + category
+    post.terms_names = {
+            'category': catlist # defined in WP + python script
+        }
+
+    # whoops, I forgot to publish it!
+    post.post_status = 'publish' # alternative is draft here !
+    post.comment_status = 'closed' # allow comments - may be only for scopus
+    wp.call(EditPost(post.id, post))# Update the before created post
+
+    # need to update the database !!!
+    return post.id
+
+
+#
+# Create comments for all citations
+#
+
+def wordpress_comment_by_scopus(wpid, data):
+
+    #print "Create Wordpress comment for post %d" % wpid
+
+    #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': '))
+
+    # decode date
+    tsstring = data['prism:coverDate'].encode('utf-8')
+    ts = datetime.strptime(tsstring, "%Y-%m-%d").timetuple()
+    year = ts.tm_year
+
+    # Display cover date and title
+    print("%s  -- %s" % (tsstring, data['dc:title']))
+
+
+    # Create WP comment
+    # define post structure
+    comment = WordPressComment()
+    comment.id = 0
+    comment.content = ""
+    
+    if 'dc:creator' in data and data['dc:creator'] is not None:
+        comment.content = data['dc:creator'] + 'et al.: '
+    
+    if 'prism:doi' in data and data['prism:doi'] is not None:
+        comment.content +='<a href="http://dx.doi.org/' + data['prism:doi'] + '">' + data['dc:title'] + '</a>'
+    else:
+        comment.content +='<b>' + data['dc:title'] + '</b>'
+
+    comment.content += ' in ' + data['prism:publicationName']
+    if 'prism:volume' in data and data['prism:volume'] is not None:
+        comment.content += ', ' + data['prism:volume']
+    comment.content += ' (' + str(year).encode('utf-8') + ')'
+    if 'prism:pageRange' in data and data['prism:pageRange'] is not None:
+        comment.content += ' ' + data['prism:pageRange']
+    if 'article-number' in data and data['article-number'] is not None:
+        comment.content += ' ' + data['article-number']
+    comment.content += '.'
+
+    # Enable comments
+    post = WordPressPost()
+    postorig = wp.call(GetPost(wpid))
+
+    post.id = wpid
+    post.date = postorig.date
+    post.title = postorig.title
+    post.content = postorig.content
+
+    post.comment_status = 'open' # allow comments - may be only for scopus
+    wp.call(EditPost(wpid, post))
+
+    comment.id = wp.call(NewComment(wpid, comment))
+
+    # Warning: Date can only be specified in edit command
+    comment.date_created = ts
+    wp.call(EditComment(comment.id, comment))# Update the before created post
+
+    # Close comments for scopus posts
+    post.comment_status = 'closed' # allow comments - may be only for scopus
+    wp.call(EditPost(wpid, post))# Update the before created post
+
+    return comment.id
+
+
+
+
+# Todo: can this be turned off for scopus???
+# Add to themes function.php:
+#add_filter('comment_flood_filter', '__return_false');
+# wordpress_xmlrpc.exceptions.InvalidCredentialsError: You are posting comments too quickly. Slow down.
+
+
+
+

+ 54 - 0
create_scopus.sql

@@ -0,0 +1,54 @@
+#CREATE DATABASE scopus;
+#USE scopus;
+
+
+#
+# Table structure for table ‘publications’
+#
+
+CREATE TABLE IF NOT EXISTS publications (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    scopusid varchar(255) default NULL,
+    eid varchar(255) default NULL,
+    wpid int default NULL,
+    citedbycount int default NULL,
+    citesloaded int default NULL,
+    categories varchar(255) default NULL,
+    doi varchar(255) default NULL,
+    scopusdata text default NULL,
+    title varchar(255) default NULL,
+    abstract text,
+    bibtex text,
+    ts datetime,
+    UNIQUE KEY(scopusid)
+);
+
+
+CREATE TABLE IF NOT EXISTS citations (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    scopusid varchar(255) default NULL,
+    eid varchar(255) default NULL,
+    wpid int default NULL,
+    wpcommentid int default NULL,
+    citedbycount int default NULL,
+    citesloaded int default NULL,
+    categories varchar(255) default NULL,
+    doi varchar(255) default NULL,
+    scopusdata text default NULL,
+    author  varchar(255) default NULL,
+    title varchar(255) default NULL,
+    journal varchar(255) default NULL,
+    volume varchar(255) default NULL,
+    pages varchar(255) default NULL,
+    articleno varchar(255) default NULL,
+    abstract text,
+    bibtex text,
+    ts datetime,
+    UNIQUE KEY(wpid,scopusid)
+);
+
+
+
+# Todo: Save the full details of all publications
+# Use json serialisation !!!
+#

+ 101 - 0
my_scopus.py

@@ -0,0 +1,101 @@
+# Access key and list of group members
+
+MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
+
+
+# PDV
+
+ak = "35313939900"
+
+# Merged with ak, Scopus 24.2.17
+#ak2 = "56656673700"
+
+csa = "15076530600"
+
+matthiasVogelgesang = "35303862100"
+
+timoDritschler = "56473578500"
+
+andreiShkarin = "56950893700"
+
+nicholasTanJerome = ""
+
+tillBergmann = "35308595100"
+
+armenBeglarian = "55343303900"
+
+petraRohr = "40561503300"
+
+norbertKunka = "35276889200"
+
+horstDemattio = "6506285395"
+
+
+
+# UFO-IPE
+
+# Warning: In the profile are two authors with the same name mixed!
+micheleCaselle = "7006767859"
+
+urosStevanovic = "55557712600"
+
+lorenzoRota = "56473442500"
+
+matthiasBalzer = "35519411500"
+
+marcWeber = "56654729000"
+mw2 = "56603987800"
+mw3 = "7404138824"
+
+
+# UFO-IPS
+
+tomyRolo = "56118820400"
+tr2 = "35194644400"
+tr3 = "35277157300"
+
+tomasFarago = "56655045700"
+
+
+alexyErshof = "56441809800"
+
+romanShkarin = "56951331000"
+
+tiloBaumbach = "7003270957"
+
+# ASTOR/ NOVA
+
+thomasVandekamp = "46761453500"
+
+michaelHeethoff = "55979397800"
+
+sebastianSchmelzle = "34768986100"
+
+philipLoesel = ""
+
+
+# Blacklist
+
+ashotChiligarian = "7004126133"
+
+hansBluemer = "7006284555"
+
+matthiasKleifegs = "6602072426"
+
+
+
+
+pdv = [ak, csa, tillBergmann, armenBeglarian, matthiasVogelgesang, petraRohr, timoDritschler, norbertKunka, horstDemattio]
+
+ufo_pdv = [ak, csa, matthiasVogelgesang, timoDritschler, andreiShkarin ]
+
+ufo_eps = [matthiasBalzer, lorenzoRota, micheleCaselle ]
+
+ufo_ips = [tomyRolo, tr2, tr3, tomasFarago]
+
+ufo_apps = [ thomasVandekamp]
+
+ufo = ufo_pdv + ufo_ips + ufo_eps
+
+
+

+ 413 - 0
scopus-get-publications.py

@@ -0,0 +1,413 @@
+# Get new publications
+# Publication and citations retrieval
+# A. Kopmann, 6.2.17 (ak)
+#
+# Scope:
+# Publications are once added to wordpressas a post or comment.
+# Afterwards scopus will not change or modify anything any more !!!
+# Update is completely in the resonsibility of the ufo users
+#
+
+# Todo:
+# - add mail to author button
+# - save full scopus data in the database
+#
+
+# Configuration - Scopus
+
+import datetime
+import requests
+import json
+import os.path
+
+from my_scopus import MY_API_KEY
+from my_scopus import ak, csa, pdv, ufo, ufo_pdv, ufo_ips, ufo_eps, ufo_apps
+from ak_scopus import get_scopus_list, get_scopus_data, get_scopus_refs
+
+
+from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus
+
+
+# Mysql persistent data (Accout: scopus, $scopus$)
+import pymysql.cursors
+import pymysql
+
+db_host = 'localhost'
+db_user = 'scopus'
+db_pw = '$scopus$'
+db_name = 'scopus'
+
+log_file = '/Users/kopmann/scopus-publications.log'
+
+
+# Summary
+npubs = 0
+nnewpubs= 0
+ncites = 0
+nnewcites = 0
+
+
+
+def update_publications(authids,authname='',scopus_opts = '',max=0):
+
+
+    print "=== Update of publications for the author group: " + authname
+    #print str(authids)
+
+
+    # Connect to the database
+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
+
+    # Request all publications of a list of authors (in one query)
+    # Result: list of records with (scopus ids, eid, citedbycount)
+    # The citation could be used later also by wordpress (may be via a plugin)
+
+    publist = get_scopus_list(authids,scopus_opts,max)
+    #publist = get_scopus_list(authids, scopus_opts, 3)
+    #publist = get_scopus_list(authids, '(PUBYEAR AFT 2014)')
+    print "Total number of publications: %d" % len(publist)
+    #print publist
+
+
+    # Save all publication to the publication database
+    try:
+        with connection.cursor() as cursor:
+            for pub in publist:
+                # 1 / Create a new records
+                #print pub # Todo: strip the prefix SCOPUS_ID?!
+                sql = "INSERT IGNORE INTO `publications` (`scopusid`,`eid`) VALUES (%s,%s)"
+                cursor.execute(sql, (pub[0],pub[1]))
+                    
+                sql = "UPDATE `publications` SET `citedbycount` = %s WHERE `scopusid` = %s"
+                cursor.execute(sql, (pub[2],pub[0]))
+
+                # 2 / Add categories
+                if len(authname) > 0:
+                    catlist = []
+                    sql = "SELECT categories FROM publications WHERE scopusid = %s"
+                    cursor.execute(sql, (pub[0]))
+                    result = cursor.fetchall()
+                    if len(result) > 0:
+                        #print "Categories %s" % result[0]['categories']
+                        cat = result[0]['categories']
+
+                    try:
+                        catlist = json.loads(cat)
+                    except TypeError:
+                        print("No categories upto now")
+
+                    if authname not in catlist:
+                        catlist += [authname]
+                
+                    sql = "UPDATE `publications` SET `categories` = %s WHERE `scopusid` = %s"
+                    cursor.execute(sql, (json.dumps(catlist),pub[0]))
+                        
+
+            # connection is not autocommit by default. So you must commit to save
+            # your changes.
+            connection.commit()
+
+    finally:
+        connection.close()
+
+
+# Read all citations and store in the citation table
+def update_citations():
+    global npubs
+    
+    print ""
+    print "=== Update citatation of all publication in the database"
+    
+    # Connect to the database
+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
+
+    # Loop over the publications and read all citations from scopus
+
+    # Todo: Shift to a separate script !?
+    try:
+        with connection.cursor() as cursor:
+            # Read a single record
+            sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications"
+            cursor.execute(sql)
+            result = cursor.fetchall()
+          
+            print "Total number of publications is %d" % len(result)
+            npubs = len(result)
+            #print "Npubs = %d" % npubs
+
+            for pub in result:
+                wpid = int(pub['wpid'])
+                if pub['citedbycount'] is None:
+                    citedbycount = 0
+                else:
+                    citedbycount = int(pub['citedbycount'])
+                if pub['citesloaded'] is None:
+                    citesloaded = 0
+                else:
+                    citesloaded = int(pub['citesloaded'])
+
+                # read list of citations
+                if pub['eid'] and (citedbycount > citesloaded):
+                    
+                    print "Processing %d = %s previously cited by %d"  % (wpid, pub['eid'], citesloaded)
+
+                    data = get_scopus_refs(pub['eid'])
+                    #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': '))
+                    
+                    n = len(data)
+                    #print "Number of citations loaded for processing %d" % n
+                    #print data
+                    
+                    if n > 0:
+                        for pub in data:
+                            #print pub['eid'] + '  ' + pub['dc:title']
+                           
+                            try:
+                                pubstr = json.dumps(pub)
+                            except TypeError:
+                                print("Error serializing pub entry")
+
+                            # save all comments to the database
+                            # wirte complete scopus data of the article !?
+                            sql = "INSERT IGNORE INTO `citations` (`wpid`,`scopusid`,`eid`,`scopusdata`) VALUES (%s,%s,%s,%s)"
+                            cursor.execute(sql, (wpid,pub['dc:identifier'],pub['eid'],pubstr))
+                            connection.commit()
+
+                        # Update the number of cites for this article
+                        if n > citesloaded:
+                            print "New citations found %d -> %d" %(citesloaded,n)
+                            sql = "UPDATE `publications` SET `citesloaded`=" + str(n) + " WHERE wpid=" + str(wpid)
+                            #print sql
+                            cursor.execute(sql)
+                            connection.commit()
+
+
+    finally:
+        connection.close()
+
+
+
+
+# Create wordpress posts for all entries that have none
+def update_wp_posts():
+    global nnewpubs
+
+    print ""
+    print "=== Create posts for newly registered publication in scopus"
+
+    # Connect to the database
+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
+        
+
+    # Todo: Shift to a separate script !?
+    try:
+        with connection.cursor() as cursor:
+            # Count all publications
+            #sql = "SELECT COUNT(id) FROM publications"
+            #cursor.execute(sql)
+            #result = cursor.fetchall()
+            #if len(result) > 0:
+            #print result[0]['COUNT(id)']
+
+            # Read a single record
+            sql = "SELECT scopusid,categories FROM publications WHERE wpid IS NULL"
+            cursor.execute(sql)
+            result = cursor.fetchall()
+            if len(result) > 0:
+                print "Number of new publications is %d" % len(result)
+                nnewpubs = len(result)
+            else:
+                print "Nothing new found"
+
+
+        # Retrieve all information required for the wordpress page
+        for pub in result:
+            print "Processing " + pub['scopusid'] + " categories " + pub['categories']
+
+            data = get_scopus_data(pub['scopusid'])
+            #print json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
+
+            # Parse categories
+            catlist = []
+            try:
+                catlist = json.loads(pub['categories'])
+            except TypeError:
+                print("No categories specified")
+
+            wpid = wordpress_post_by_scopus(data, catlist)
+            
+            #print wpid
+            #print pub['scopusid']
+
+            # Warning: the resulting string uses double quotes (") so use
+            # single quotes (') for the sql command
+            datastr = json.dumps(data)
+            #print datastr
+
+            # Update publication database !!!
+            with connection.cursor() as cursor:
+                # Read a single record
+                #sql = "UPDATE publications SET wpid=" + str(wpid) + ",scopusdata='" + datastr + "' WHERE scopusid = '" + pub['scopusid'] + "'"
+                sql = "UPDATE publications SET wpid=" + str(wpid) + " WHERE scopusid = '" + pub['scopusid'] + "'"
+                cursor.execute(sql)
+                connection.commit()
+
+    finally:
+        connection.close()
+
+
+def update_wp_comments():
+    global ncites
+    global nnewcites
+    
+    print ""
+    print "=== Create comments for newly registered citations in scopus"
+    
+    
+    # Connect to the database
+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
+    
+    
+    # Todo: Shift to a separate script !?
+    try:
+        with connection.cursor() as cursor:
+            # Count all citations
+            sql = "SELECT COUNT(id) FROM citations"
+            cursor.execute(sql)
+            result = cursor.fetchall()
+            if len(result) > 0:
+                ncites = result[0]['COUNT(id)']
+                #print result[0]['COUNT(id)']
+
+            # Read a single record
+            sql = "SELECT id,wpid,scopusdata FROM citations WHERE wpcommentid IS NULL"
+            cursor.execute(sql)
+            result = cursor.fetchall()
+            print "Number of new citations is %d" % len(result)
+            nnewcites = len(result)
+
+            for pub in result:
+                wpid = int(pub['wpid'])
+                print "Processing post " + str(wpid)
+            
+                data = []
+                try:
+                    data = json.loads(pub['scopusdata'])
+                except TypeError:
+                    print("Scopus data missing?!")
+
+                # If the creation of the comment fails, the wpcommentid 0 is
+                # written to the database. This means, there is no second try
+                # to get this citations added.
+                # All failed comments can be found by searching for wpcommentid = 0
+                #
+                wpcommentid = 0
+                try:
+                    wpcommentid = wordpress_comment_by_scopus(wpid, data)
+                except:
+                    print "Error: Submission of comment failed"
+
+                sql = "UPDATE citations SET wpcommentid=" + str(wpcommentid) + " WHERE id = '" + str(pub['id']) + "'"
+                cursor.execute(sql)
+                connection.commit()
+
+    finally:
+        connection.close()
+
+
+# Todo: Add a script to save the data for all publications in the database!!!
+# There was some problem before?!
+#
+
+# Todo: Add scripts to check consistence in the database
+# and fix problems if detected
+# E.g. search for wpcommentid == 0
+# Check if, wp posts + comments are still availabe, display
+# deleted entries
+#
+
+
+
+# Main
+
+start = datetime.datetime.now()
+
+print ""
+print "***********************************************"
+print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****"
+print "***********************************************"
+print ""
+
+
+
+# Update publaction database; search for new publications
+# Loop over all user groups defined in ak_scopus.py
+
+# Todo: Detect, if there is no access to scopus !!!
+#
+
+# Define the author, that should be considered
+#authors = ["Computing", ufo_pdv]
+#print authors
+
+#update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)',10)
+update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)')
+update_publications(ufo_ips, "X-ray Imaging", '(PUBYEAR AFT 2010)')
+update_publications(ufo_eps, "Electronics", '(PUBYEAR AFT 2010)')
+update_publications(ufo_apps, "Morphology", '(PUBYEAR AFT 2010)')
+
+
+update_wp_posts()
+
+# read all citations
+# Todo: read only new citations?!
+
+
+update_citations()
+
+
+# loop over all cites and post comments to wordpress, when necessary
+# update database
+
+update_wp_comments()
+# Todo: deactivate comments for scopus posts!!!
+
+
+# Display summary
+end = datetime.datetime.now()
+print ""
+print "Summary: (see also logfile %s) " % log_file
+print "Date       = " + str(start)
+print "NPubs      = " + str(npubs)
+print "NNewPubs   = " + str(nnewpubs)
+print "NCites     = " + str(ncites)
+print "NNewCites  = " + str(nnewcites)
+print "Runtime    = " + str(end - start)
+
+
+# Write summary to log file
+if not os.path.isfile(log_file):
+    print "Create logfile " + log_file
+    # Open file and write header
+    log = open(log_file,"w")
+    log.write(__file__ + "\n")
+    log.write("\n")
+    log.write("      Date\t    Time\tNPubs\tNNewP\tNCite\tNNewC\t          TRun\n")
+    log.write("------------------------------------------------------------------------------\n")
+    log.close()
+
+
+log = open(log_file,"a")
+log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
+                                    start.strftime("%H:%M:%S"),
+                                    npubs,nnewpubs,ncites,nnewcites,str(end-start)))
+log.close()
+
+
+
+# done
+
+
+
+
+

+ 6 - 0
update.sh

@@ -0,0 +1,6 @@
+# Update publications on UFO webpage by new data in Scopus
+# A Kopmann, 11.4.2017
+#
+
+python -W ignore /root/scopus/scopus-get-publications.py
+