7 years ago · e58e305e1d
--- a/README.txt
+++ b/README.txt
@@ -0,0 +1,368 @@
 
				+README scopus
			
 
				+Ak, 27.3.2017
			
 
				+
			
 
				+Get information from Scopus database.
			
 
				+
			
 
				+This queries work only with access to Scopus (e.g. from KIT LAN)
			
 
				+Scopus service is not public vailable.
			
 
				+
			
 
				+
			
 
				+Content
			
 
				+info		Documentation, website, etc
			
 
				+
			
 
				+readme.txt	This file
			
 
				+my_scopus.py	List of scopus author ids
			
 
				+ak_scopus.py	Functions to access scopus
			
 
				+ak_wordpress.py Functions to creates Wordpress posts + comments
			
 
				+scopus-get-publications.py Script to query Scopus
			
 
				+
			
 
				+test-scopus.py	Application with some functions to get publication entries
			
 
				+		Prints a list with some formatting
			
 
				+test-scopus2.py Example from one of the website, only one query
			
 
				+
			
 
				+
			
 
				+Usage: 
			
 
				+1. Go to Scopus and retrieve the scopus author ids for the scientists in your group.
			
 
				+Define the ids in my_scopus.py and group them.
			
 
				+
			
 
				+2. Select one of more author groups in scopus-get-publications.py (main part at
			
 
				+the end of the file). Check definition of database and wordpress installation.
			
 
				+
			
 
				+3. Execute scopus-get-publications.py.
			
 
				+python -W ignore scopus-get-publications.py
			
 
				+
			
 
				+Note: The -W ignore flag might be necessary if the INSERT IGNORE causes warnings.
			
 
				+
			
 
				+Example run:
			
 
				+ufo:~/scopus # python -W ignore scopus-get-publications.py 
			
 
				+
			
 
				+***********************************************
			
 
				+**** scopus-get-publications / 2017-03-27 *****
			
 
				+***********************************************
			
 
				+
			
 
				+=== Update of publications for the author group: Computing
			
 
				+Total number of publications: 54
			
 
				+=== Update of publications for the author group: X-ray Imaging
			
 
				+Total number of publications: 39
			
 
				+=== Update of publications for the author group: Electronics
			
 
				+Total number of publications: 132
			
 
				+=== Update of publications for the author group: Morphology
			
 
				+Total number of publications: 21
			
 
				+
			
 
				+=== Create posts for newly registered publication in scopus
			
 
				+Nothing new found
			
 
				+
			
 
				+=== Update citatation of all publication in the database
			
 
				+Total number of publications is 281
			
 
				+
			
 
				+=== Create comments for newly registered citations in scopus
			
 
				+Number of new citations is 0
			
 
				+
			
 
				+Summary: (see also logfile /root/scopus/scopus-publications.log) 
			
 
				+Date       = 2017-03-27 21:28:36.002624
			
 
				+NPubs      = 281
			
 
				+NNewPubs   = 0
			
 
				+NCites     = 4699
			
 
				+NNewCites  = 0
			
 
				+Runtime    = 0:00:11.496362
			
 
				+
			
 
				+
			
 
				+Further enhancements
			
 
				+
			
 
				+Todo:
			
 
				+- Reprocessing of all post, if the format has changed 
			
 
				+E.g. add button 
			
 
				+with Email to author or a new category has been added
			
 
				+- Query only the latest citations for each publications not all.
			
 
				+- Store JSON-Data of all publications
			
 
				+- Get bibliographic information for display at the web page of a reseach group 
			
 
				+like UFO or may be also later for the DTS program.
			
 
				+- Handle wrong publications in scopus for author with same name
			
 
				+- Automatically include reports and student thesis by
			
 
				+bibtex definition and upload on a server!?
			
 
				+-> Would have the nice effect, that all student work is organized systematically!!!
			
 
				+
			
 
				+
			
 
				+
			
 
				+Structure of the database
			
 
				+
			
 
				+Both tables keep the reference to the publications in Scopus and the 
			
 
				+Wordpress ids. With this information, reprocessing is possible (but not
			
 
				+implemented now).
			
 
				+
			
 
				+
			
 
				+Table publocations:
			
 
				+
			
 
				+MariaDB [scopus]> describe publications;
			
 
				++--------------+--------------+------+-----+---------+----------------+
			
 
				+| Field        | Type         | Null | Key | Default | Extra          |
			
 
				++--------------+--------------+------+-----+---------+----------------+
			
 
				+| id           | int(11)      | NO   | PRI | NULL    | auto_increment |
			
 
				+| scopusid     | varchar(255) | YES  | UNI | NULL    |                |
			
 
				+| wpid         | int(11)      | YES  |     | NULL    |                |
			
 
				+| citedbycount | int(11)      | YES  |     | NULL    |                |
			
 
				+| citesloaded  | int(11)      | YES  |     | NULL    |                |
			
 
				+| categories   | varchar(255) | YES  |     | NULL    |                |
			
 
				+| doi          | varchar(255) | YES  |     | NULL    |                |
			
 
				+| title        | varchar(255) | YES  |     | NULL    |                |
			
 
				+| abstract     | text         | YES  |     | NULL    |                |
			
 
				+| bibtex       | text         | YES  |     | NULL    |                |
			
 
				+| ts           | datetime     | YES  |     | NULL    |                |
			
 
				+| scopusdata   | text         | YES  |     | NULL    |                |
			
 
				+| eid          | varchar(255) | YES  |     | NULL    |                |
			
 
				++--------------+--------------+------+-----+---------+----------------+
			
 
				+
			
 
				+Table citations:
			
 
				+
			
 
				+MariaDB [scopus]> describe citations;
			
 
				++--------------+--------------+------+-----+---------+----------------+
			
 
				+| Field        | Type         | Null | Key | Default | Extra          |
			
 
				++--------------+--------------+------+-----+---------+----------------+
			
 
				+| id           | int(11)      | NO   | PRI | NULL    | auto_increment |
			
 
				+| scopusid     | varchar(255) | YES  |     | NULL    |                |
			
 
				+| eid          | varchar(255) | YES  |     | NULL    |                |
			
 
				+| wpid         | int(11)      | YES  | MUL | NULL    |                |
			
 
				+| wpcommentid  | int(11)      | YES  |     | NULL    |                |
			
 
				+| citedbycount | int(11)      | YES  |     | NULL    |                |
			
 
				+| citesloaded  | int(11)      | YES  |     | NULL    |                |
			
 
				+| categories   | varchar(255) | YES  |     | NULL    |                |
			
 
				+| doi          | varchar(255) | YES  |     | NULL    |                |
			
 
				+| scopusdata   | text         | YES  |     | NULL    |                |
			
 
				+| title        | varchar(255) | YES  |     | NULL    |                |
			
 
				+| abstract     | text         | YES  |     | NULL    |                |
			
 
				+| bibtex       | text         | YES  |     | NULL    |                |
			
 
				+| ts           | datetime     | YES  |     | NULL    |                |
			
 
				++--------------+--------------+------+-----+---------+----------------+
			
 
				+
			
 
				+
			
 
				+
			
 
				+Setup of scopus database in mysql 
			
 
				+
			
 
				+create database scopus;
			
 
				+
			
 
				+CREATE USER 'scopus@localhost';
			
 
				+grant all on scopus.* to 'scopus'@'localhost' identified by '$scopus$';
			
 
				+
			
 
				+# create tables
			
 
				+mysql -u scopus -p scopus < create_scopus.sql
			
 
				+
			
 
				+
			
 
				+
			
 
				+Publications in Scopus:
			
 
				+
			
 
				+Sometime (unfortunately quite often) a author id in Scopus is not unique but 
			
 
				+identifies several researchers with the same name. E.g. Michele Caselle (3 persons)
			
 
				+Matthias Balzer (2).
			
 
				+
			
 
				+This case is currently handled manually by deleting all publications from the unknown
			
 
				+authors. Might be possible to implement also a black list??
			
 
				+
			
 
				+
			
 
				+Sample data from Scopus:
			
 
				+{
			
 
				+    "abstracts-retrieval-response": {
			
 
				+        "authors": {
			
 
				+            "author": [
			
 
				+                {
			
 
				+                    "@_fa": "true",
			
 
				+                    "@auid": "15076530600",
			
 
				+                    "@seq": "1",
			
 
				+                    "affiliation": {
			
 
				+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
			
 
				+                        "@id": "60102538"
			
 
				+                    },
			
 
				+                    "author-url": "http://api.elsevier.com/content/author/author_id/15076530600",
			
 
				+                    "ce:given-name": "Suren",
			
 
				+                    "ce:indexed-name": "Chilingaryan S.",
			
 
				+                    "ce:initials": "S.",
			
 
				+                    "ce:surname": "Chilingaryan",
			
 
				+                    "preferred-name": {
			
 
				+                        "ce:given-name": "Suren",
			
 
				+                        "ce:indexed-name": "Chilingaryan S.",
			
 
				+                        "ce:initials": "S.",
			
 
				+                        "ce:surname": "Chilingaryan"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "@_fa": "true",
			
 
				+                    "@auid": "35313939900",
			
 
				+                    "@seq": "2",
			
 
				+                    "affiliation": {
			
 
				+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
			
 
				+                        "@id": "60102538"
			
 
				+                    },
			
 
				+                    "author-url": "http://api.elsevier.com/content/author/author_id/35313939900",
			
 
				+                    "ce:given-name": "Andreas",
			
 
				+                    "ce:indexed-name": "Kopmann A.",
			
 
				+                    "ce:initials": "A.",
			
 
				+                    "ce:surname": "Kopmann",
			
 
				+                    "preferred-name": {
			
 
				+                        "ce:given-name": "Andreas",
			
 
				+                        "ce:indexed-name": "Kopmann A.",
			
 
				+                        "ce:initials": "A.",
			
 
				+                        "ce:surname": "Kopmann"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "@_fa": "true",
			
 
				+                    "@auid": "56001075000",
			
 
				+                    "@seq": "3",
			
 
				+                    "affiliation": {
			
 
				+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60032633",
			
 
				+                        "@id": "60032633"
			
 
				+                    },
			
 
				+                    "author-url": "http://api.elsevier.com/content/author/author_id/56001075000",
			
 
				+                    "ce:given-name": "Alessandro",
			
 
				+                    "ce:indexed-name": "Mirone A.",
			
 
				+                    "ce:initials": "A.",
			
 
				+                    "ce:surname": "Mirone",
			
 
				+                    "preferred-name": {
			
 
				+                        "ce:given-name": "Alessandro",
			
 
				+                        "ce:indexed-name": "Mirone A.",
			
 
				+                        "ce:initials": "A.",
			
 
				+                        "ce:surname": "Mirone"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "@_fa": "true",
			
 
				+                    "@auid": "35277157300",
			
 
				+                    "@seq": "4",
			
 
				+                    "affiliation": {
			
 
				+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
			
 
				+                        "@id": "60102538"
			
 
				+                    },
			
 
				+                    "author-url": "http://api.elsevier.com/content/author/author_id/35277157300",
			
 
				+                    "ce:given-name": "Tomy",
			
 
				+                    "ce:indexed-name": "Dos Santos Rolo T.",
			
 
				+                    "ce:initials": "T.",
			
 
				+                    "ce:surname": "Dos Santos Rolo",
			
 
				+                    "preferred-name": {
			
 
				+                        "ce:given-name": "Tomy",
			
 
				+                        "ce:indexed-name": "Dos Santos Rolo T.",
			
 
				+                        "ce:initials": "T.",
			
 
				+                        "ce:surname": "Dos Santos Rolo"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "@_fa": "true",
			
 
				+                    "@auid": "35303862100",
			
 
				+                    "@seq": "5",
			
 
				+                    "affiliation": {
			
 
				+                        "@href": "http://api.elsevier.com/content/affiliation/affiliation_id/60102538",
			
 
				+                        "@id": "60102538"
			
 
				+                    },
			
 
				+                    "author-url": "http://api.elsevier.com/content/author/author_id/35303862100",
			
 
				+                    "ce:given-name": "Matthias",
			
 
				+                    "ce:indexed-name": "Vogelgesang M.",
			
 
				+                    "ce:initials": "M.",
			
 
				+                    "ce:surname": "Vogelgesang",
			
 
				+                    "preferred-name": {
			
 
				+                        "ce:given-name": "Matthias",
			
 
				+                        "ce:indexed-name": "Vogelgesang M.",
			
 
				+                        "ce:initials": "M.",
			
 
				+                        "ce:surname": "Vogelgesang"
			
 
				+                    }
			
 
				+                }
			
 
				+            ]
			
 
				+        },
			
 
				+        "coredata": {
			
 
				+            "citedby-count": "0",
			
 
				+            "dc:description": "X-ray tomography has been proven to be a valuable tool for understanding internal, otherwise invisible, mechanisms in biology and other fields. Recent advances in digital detector technology enabled investigation of dynamic processes in 3D with a temporal resolution down to the milliseconds range. Unfortunately it requires computationally intensive recon- struction algorithms with long post-processing times. We have optimized the reconstruction software employed at the micro-tomography beamlines at KIT and ESRF. Using a 4 stage pipelined architecture and the computational power of modern graphic cards, we were able to reduce the processing time by a factor 75 with a single server. The time required to reconstruct a typical 3D image is reduced down to several seconds only and online visualization is possible for the first time.Copyright is held by the author/owner(s).",
			
 
				+            "dc:identifier": "SCOPUS_ID:84859045029",
			
 
				+            "dc:title": "Poster: A GPU-based architecture for real-time data assessment at synchrotron experiments",
			
 
				+            "link": [
			
 
				+                {
			
 
				+                    "@_fa": "true",
			
 
				+                    "@href": "http://api.elsevier.com/content/abstract/scopus_id/84859045029",
			
 
				+                    "@rel": "self"
			
 
				+                }
			
 
				+            ],
			
 
				+            "prism:aggregationType": "Conference Proceeding",
			
 
				+            "prism:coverDate": "2011-12-01",
			
 
				+            "prism:doi": "10.1145/2148600.2148624",
			
 
				+            "prism:pageRange": "51-52",
			
 
				+            "prism:publicationName": "SC'11 - Proceedings of the 2011 High Performance Computing Networking, Storage and Analysis Companion, Co-located with SC'11",
			
 
				+            "prism:url": "http://api.elsevier.com/content/abstract/scopus_id/84859045029"
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+Installation of python, mysql et al:
			
 
				+
			
 
				+pip install python-wordpress-xmlrpc
			
 
				+
			
 
				+Konfiguration Webserver (muss man wohl nach jeder Installation neu machen!!!)
			
 
				+
			
 
				+/etc/apache2/httpd.conf:
			
 
				+LoadModule userdir_module libexec/apache2/mod_userdir.so
			
 
				+LoadModule php5_module libexec/apache2/libphp5.so
			
 
				+Include /private/etc/apache2/extra/httpd-userdir.conf
			
 
				+
			
 
				+/etc/apache2/extra/httpd-userdir.conf:
			
 
				+Include /private/etc/apache2/users/*.conf
			
 
				+
			
 
				+/etc/php.ini:
			
 
				+pdo_mysql.default_socket= /tmp/mysql.sock
			
 
				+mysql.default_socket = /tmp/mysql.sock
			
 
				+mysqli.default_socket = /tmp/mysql.sock
			
 
				+
			
 
				+
			
 
				+sh-3.2# apachectl restart
			
 
				+
			
 
				+
			
 
				+Install website:
			
 
				+
			
 
				+Create archive with wp dublicator
			
 
				+
			
 
				+Save scopus database
			
 
				+
			
 
				+mysqldump -u scopus -p scopus > scopus-170322.sql
			
 
				+
			
 
				+
			
 
				+Create database on remote system
			
 
				+
			
 
				+mysql:
			
 
				+CREATE USER 'scopus'@'localhost' IDENTIFIED BY '$scopus$';
			
 
				+GRANT ALL PRIVILEGES ON scopus.* TO 'scopus'@'localhost';
			
 
				+
			
 
				+CREATE DATABASE scopus;
			
 
				+
			
 
				+mysql -u scopus -p scopus < scopus-170322.sql
			
 
				+
			
 
				+
			
 
				+Create database wp_ufo2;
			
 
				+
			
 
				+CREATE USER ‘ufo’@‘localhost' IDENTIFIED BY '$ipepdv$';
			
 
				+GRANT ALL PRIVILEGES ON wp_ufo2.* TO ‘ufo’@‘localhost';
			
 
				+
			
 
				+CREATE DATABASE wp_ufo2;
			
 
				+
			
 
				+
			
 
				+Import WP archive:
			
 
				+mkdir ufo2
			
 
				+chown -R wwwrun:www ufo2
			
 
				+
			
 
				+http://ufo.kit.edu/ufo2/installer.php
			
 
				+
			
 
				+
			
 
				+Error: PHP module ZipArchive is missing 
			
 
				+Manual extraction is available in the advanced options !!!
			
 
				+
			
 
				+
			
 
				+Installation Scopus-Scripts:
			
 
				+
			
 
				+pip install requests
			
 
				+pip install python-wordpress-xmlrpc
			
 
				+pip install pymysql
			
 
				+
			
 
				+Check configurations:
			
 
				+scopus-get-piblications.py
			
 
				+ak_wordpress.py
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/ak_scopus.py
+++ b/ak_scopus.py
@@ -0,0 +1,265 @@
 
				+import requests
			
 
				+import json
			
 
				+from my_scopus import MY_API_KEY
			
 
				+
			
 
				+
			
 
				+#
			
 
				+# Get the last N publications of an given author list
			
 
				+#
			
 
				+# Arguments: list of scopus author ids (e.g. "35313939900")
			
 
				+# Returns: list of scopus ids, electronic id and number of citations
			
 
				+# DOI would be desirable but is not available in some records
			
 
				+# e.g. ('SCOPUS_ID:0037368024',)
			
 
				+# The result can be used in the display functions like get_scopus_brief()
			
 
				+#
			
 
				+def get_scopus_list(author_list, opt='', max=0):
			
 
				+    
			
 
				+    count = 25 # define the number of requests publications in one call
			
 
				+
			
 
				+
			
 
				+    if isinstance(author_list, list):
			
 
				+        #print "Length of author list %d" % len(author_list)
			
 
				+        query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
			
 
				+    else:
			
 
				+        query = 'AU-ID('+author_list+')'
			
 
				+
			
 
				+    if len(opt) > 0:
			
 
				+        query = query + " AND " + opt
			
 
				+    
			
 
				+# The scopus query is limited by the number of results
			
 
				+# To get all results a loop over all results is required
			
 
				+# The loop is controlled by count and start
			
 
				+
			
 
				+# limit the results by time: Arguments?
			
 
				+# e.g. PUBYEAR AFT 2010
			
 
				+
			
 
				+    if (max > 0) and (max < count):
			
 
				+        npubstoget = max
			
 
				+        count = max
			
 
				+    else:
			
 
				+        npubstoget = count
			
 
				+
			
 
				+    n = 0
			
 
				+    start = 0
			
 
				+    ntotal = 0
			
 
				+    publist = []
			
 
				+    while (npubstoget > 0):
			
 
				+        
			
 
				+        loopargs = "&count=%d&start=%d" % (count, start)
			
 
				+        #print loopargs
			
 
				+
			
 
				+        #print "Query: " + query
			
 
				+        url = ("http://api.elsevier.com/content/search/scopus?query="
			
 
				+               +query+ "&field=dc:identifier,citedby-count,eid" + loopargs)
			
 
				+        #print "URL: " + url
			
 
				+        resp = requests.get(url,
			
 
				+                headers={'Accept':'application/json',
			
 
				+                        'X-ELS-APIKey': MY_API_KEY})
			
 
				+        #print resp
			
 
				+        results = resp.json()
			
 
				+
			
 
				+
			
 
				+        if (n==0):
			
 
				+            n = int(results['search-results']['opensearch:totalResults'])
			
 
				+            #print "Number publications found = %d" % n
			
 
				+            if (max == 0):
			
 
				+                npubstoget = n
			
 
				+
			
 
				+        #print json.dumps(resp.json(),
			
 
				+        #             sort_keys=True,
			
 
				+        #             indent=4, separators=(',', ': '))
			
 
				+
			
 
				+        newpubs = []
			
 
				+        for r in results['search-results']["entry"]:
			
 
				+            #print r
			
 
				+            try:
			
 
				+                newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]]
			
 
				+            except KeyError:
			
 
				+                print "Warning: There is data missing"
			
 
				+                print r
			
 
				+
			
 
				+# Todo : DOI is not always available !!!
			
 
				+
			
 
				+
			
 
				+        #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]]
			
 
				+        #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]]
			
 
				+
			
 
				+        publist += newpubs
			
 
				+
			
 
				+# Todo: Counting by the list is dangerous - if an element is missing !!!
			
 
				+
			
 
				+        nreceived = len(newpubs)
			
 
				+        nlist = len(publist)
			
 
				+        #print "Received: %d" %nreceived
			
 
				+        #print "In list= %d" %nlist
			
 
				+
			
 
				+        # Next iteration
			
 
				+        #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max)
			
 
				+        npubstoget = npubstoget - count
			
 
				+        start += count
			
 
				+
			
 
				+    return publist
			
 
				+
			
 
				+
			
 
				+def get_scopus_data(SCOPUS_ID):
			
 
				+    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
			
 
				+           + SCOPUS_ID
			
 
				+           + "?field=article-number,title,publicationName,volume,issueIdentifier,"
			
 
				+           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
			
 
				+    #print url
			
 
				+    resp = requests.get(url,
			
 
				+            headers={'Accept':'application/json',
			
 
				+            'X-ELS-APIKey': MY_API_KEY})
			
 
				+            
			
 
				+    #results = json.loads(resp.text.encode('utf-8'))
			
 
				+    results = resp.json()
			
 
				+    return results
			
 
				+
			
 
				+
			
 
				+
			
 
				+def get_scopus_refs(EID):
			
 
				+    # Todo: implement loop, if there are more than 25 citations !!!
			
 
				+    #
			
 
				+
			
 
				+
			
 
				+    count = 25
			
 
				+    n = 0
			
 
				+    npubstoget = 25
			
 
				+    start = 0
			
 
				+    ntotal = 0
			
 
				+    publist = []
			
 
				+
			
 
				+    while (npubstoget > 0):
			
 
				+    
			
 
				+        loopargs = "&count=%d&start=%d" % (count, start)
			
 
				+        #print loopargs
			
 
				+
			
 
				+        url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
			
 
				+               + EID + ")" + loopargs)
			
 
				+
			
 
				+        #print "URL: " + url
			
 
				+        resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
			
 
				+
			
 
				+        results = resp.json()
			
 
				+        #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': '))
			
 
				+
			
 
				+        if (n==0):
			
 
				+            n = int(results['search-results']['opensearch:totalResults'])
			
 
				+            #print "Current number citations in scopus = %d" % n
			
 
				+            npubstoget = n
			
 
				+
			
 
				+        if (n>0):
			
 
				+                publist += results['search-results']['entry']
			
 
				+
			
 
				+        npubstoget = npubstoget - count
			
 
				+        start += count
			
 
				+
			
 
				+
			
 
				+    return publist
			
 
				+
			
 
				+
			
 
				+
			
 
				+def get_scopus_info(SCOPUS_ID):
			
 
				+    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
			
 
				+           + SCOPUS_ID
			
 
				+           + "?field=article-number,title,publicationName,volume,issueIdentifier,"
			
 
				+           + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
			
 
				+    #print url
			
 
				+    resp = requests.get(url,
			
 
				+                headers={'Accept':'application/json',
			
 
				+                        'X-ELS-APIKey': MY_API_KEY})
			
 
				+    results = json.loads(resp.text.encode('utf-8'))
			
 
				+    #print resp
			
 
				+    #print results
			
 
				+    
			
 
				+    fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'
			
 
				+
			
 
				+    return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
			
 
				+                                title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
			
 
				+                                 journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
			
 
				+                                 volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
			
 
				+                                 articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
			
 
				+                                             results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
			
 
				+                                 date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
			
 
				+                                 doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
			
 
				+                                 cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
			
 
				+                                 abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
			
 
				+
			
 
				+
			
 
				+#
			
 
				+# Display a list of publications in plain text format
			
 
				+#
			
 
				+# Argement: scopus id of the publication
			
 
				+#
			
 
				+# Todo: Implement other formats (e.g. html, bibtex)
			
 
				+#   Format publications as articles, Title, Abstract
			
 
				+#
			
 
				+def get_scopus_brief(SCOPUS_ID, max_authors=1000):
			
 
				+    id = SCOPUS_ID
			
 
				+    if isinstance(id, list):
			
 
				+        id = id[0]
			
 
				+        
			
 
				+    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
			
 
				+           + id
			
 
				+           + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
			
 
				+           + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")
			
 
				+
			
 
				+    #print url
			
 
				+    resp = requests.get(url,
			
 
				+                headers={'Accept':'application/json',
			
 
				+                            'X-ELS-APIKey': MY_API_KEY})
			
 
				+    results = json.loads(resp.text.encode('utf-8'))
			
 
				+    #print resp
			
 
				+    #print results
			
 
				+    
			
 
				+    coredata = results['abstracts-retrieval-response']['coredata']
			
 
				+    
			
 
				+    pub = ''
			
 
				+    authors = results['abstracts-retrieval-response']['authors']['author']
			
 
				+    #print "Number of authors: %d" %len(authors)
			
 
				+
			
 
				+    if len(authors) > max_authors:
			
 
				+        return ''
			
 
				+
			
 
				+    if len(authors) > 20:
			
 
				+        pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
			
 
				+    else:
			
 
				+        pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '
			
 
				+
			
 
				+    try:
			
 
				+        if coredata.get('dc:title'):
			
 
				+            pub = pub + coredata.get('dc:title').encode('utf-8')
			
 
				+    except ValueError:
			
 
				+        print "!!! Error encoding title of publication !!!"
			
 
				+        #print coredata.get('dc:title')
			
 
				+        pub = pub + coredata.get('dc:title')
			
 
				+
			
 
				+    if coredata.get('prism:publicationName'):
			
 
				+        pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')
			
 
				+
			
 
				+    if coredata.get('prism:volume'):
			
 
				+        pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')
			
 
				+
			
 
				+    if coredata.get('prism:issueIdentifier'):
			
 
				+        pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
			
 
				+    
			
 
				+    if coredata.get('prism:coverDate'):
			
 
				+        pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '
			
 
				+
			
 
				+    if coredata.get('prism:pageRange'):
			
 
				+        pub = pub + coredata.get('prism:pageRange').encode('utf-8')
			
 
				+    elif coredata.get('article-number'):
			
 
				+        pub = pub + coredata.get('article-number').encode('utf-8')
			
 
				+
			
 
				+    if coredata.get('prism:doi'):
			
 
				+        pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')
			
 
				+
			
 
				+    if coredata.get('citedby-count'):
			
 
				+        pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'
			
 
				+
			
 
				+    pub = pub + '.\n'
			
 
				+
			
 
				+
			
 
				+    return pub
			
 
				+
			
 
				+
			
--- a/ak_wordpress.py
+++ b/ak_wordpress.py
@@ -0,0 +1,191 @@
 
				+# Create posts via wordpress API
			
 
				+# A. Kopmann 6.2.2017 (ak)
			
 
				+#
			
 
				+
			
 
				+from datetime import datetime
			
 
				+import json
			
 
				+
			
 
				+from wordpress_xmlrpc import Client
			
 
				+from wordpress_xmlrpc import WordPressPost, WordPressComment
			
 
				+from wordpress_xmlrpc.methods.posts import GetPost, NewPost, EditPost
			
 
				+from wordpress_xmlrpc.methods.comments import NewComment, EditComment
			
 
				+
			
 
				+
			
 
				+# Use Wordpress account - not the mysql credentials
			
 
				+wp = Client('http://localhost/~kopmann/ufo2/xmlrpc.php', 'scopus', '$scopus$')
			
 
				+
			
 
				+
			
 
				+#
			
 
				+# create a post from a scopus query
			
 
				+#
			
 
				+def wordpress_post_by_scopus(data, category = []):
			
 
				+    
			
 
				+    coredata = data['abstracts-retrieval-response']['coredata']
			
 
				+    try:
			
 
				+        authors = data['abstracts-retrieval-response']['authors']['author']
			
 
				+    except KeyError:
			
 
				+        print "Have not found authors in dataset"
			
 
				+        print " -> Is the connection to scopus broken???"
			
 
				+        exit()
			
 
				+
			
 
				+    # decode date
			
 
				+    tsstring = coredata['prism:coverDate'].encode('utf-8')
			
 
				+    ts = datetime.strptime(tsstring, "%Y-%m-%d").timetuple()
			
 
				+    year = ts.tm_year
			
 
				+
			
 
				+    # Display cover date and title
			
 
				+    print("%s  -- %s" % (tsstring, coredata['dc:title']))
			
 
				+
			
 
				+
			
 
				+    # define post structure
			
 
				+    post = WordPressPost()
			
 
				+    post.title = coredata['dc:title'].encode('utf-8')
			
 
				+    post.date = ts
			
 
				+
			
 
				+    # set the name of the post different to the title
			
 
				+    post.slug = coredata['dc:identifier'].encode('utf-8')
			
 
				+    
			
 
				+    post.excerpt = authors[0]['ce:indexed-name'].encode('utf-8')
			
 
				+    if len(authors) > 2:
			
 
				+        post.excerpt += " et al."
			
 
				+    elif len(authors) == 2:
			
 
				+        post.excerpt += u', ' + authors[1]['ce:indexed-name'].encode('utf-8')
			
 
				+    post.excerpt += u', in <em>' + coredata['prism:publicationName'].encode('utf-8') + u'</em>'
			
 
				+    if 'prism:volume' in coredata:
			
 
				+        post.excerpt += u', ' + coredata['prism:volume'].encode('utf-8')
			
 
				+    post.excerpt += u' (' + str(year).encode('utf-8') + u')'
			
 
				+    if 'prism:pageRange' in coredata:
			
 
				+        post.excerpt += u' ' + coredata['prism:pageRange'].encode('utf-8')
			
 
				+    if 'article-number' in coredata:
			
 
				+        post.excerpt += u', ' + coredata['article-number'].encode('utf-8')
			
 
				+    post.excerpt += u'.'
			
 
				+
			
 
				+    post.content = u'<p>' + authors[0]['ce:indexed-name'].encode('utf-8')
			
 
				+    authors.pop(0)
			
 
				+    if len(authors) > 20:
			
 
				+        post.content += " et al."
			
 
				+    else:
			
 
				+        for author in authors:
			
 
				+            post.content += u', ' + author['ce:indexed-name'].encode('utf-8')
			
 
				+    post.content += u'</p>'
			
 
				+    post.content += u'<p>in <em>' + coredata['prism:publicationName'].encode('utf-8') + u'</em>'
			
 
				+    if 'prism:volume' in coredata:
			
 
				+        post.content += u', ' + coredata['prism:volume'].encode('utf-8')
			
 
				+    post.content += u' (' + str(year).encode('utf-8') + u')'
			
 
				+    if 'prism:pageRange' in coredata:
			
 
				+        post.content += u' ' + coredata['prism:pageRange'].encode('utf-8')
			
 
				+    if 'article-number' in coredata:
			
 
				+        post.content += u', ' + coredata['article-number'].encode('utf-8')
			
 
				+    post.content += u'.'
			
 
				+    if 'prism:doi' in coredata:
			
 
				+        post.content += u' DOI:' + coredata['prism:doi'].encode('utf-8')
			
 
				+    post.content += u'</p>\n\n'
			
 
				+    if 'dc:description' in coredata:
			
 
				+        post.content += u'<div class="accordion-inner"><h4>Abstract</h4>' + coredata['dc:description']
			
 
				+        if 'authkeywords' in coredata:
			
 
				+            post.content += u'\n<b>Keywords:</b> ' + coredata['authkeywords'].encode('utf-8')
			
 
				+        post.content += u'</div>'
			
 
				+    if 'prism:doi' in coredata:
			
 
				+        link = u'http://dx.doi.org/' + coredata['prism:doi'].encode('utf-8')
			
 
				+        post.content += u'\n\n<div class="accordion-inner"><a class="btn btn-primary" href="' + link + u'"><i class="icon-download icon-white"></i> Get it</a></div>'
			
 
				+
			
 
				+    #print post.content
			
 
				+
			
 
				+    post.id = wp.call(NewPost(post)) # Creates a new post and returns the id!
			
 
				+
			
 
				+
			
 
				+    if category == '':
			
 
				+        catlist = ['Publications']
			
 
				+    else:
			
 
				+        catlist = ['Publications'] + category
			
 
				+    post.terms_names = {
			
 
				+            'category': catlist # defined in WP + python script
			
 
				+        }
			
 
				+
			
 
				+    # whoops, I forgot to publish it!
			
 
				+    post.post_status = 'publish' # alternative is draft here !
			
 
				+    post.comment_status = 'closed' # allow comments - may be only for scopus
			
 
				+    wp.call(EditPost(post.id, post))# Update the before created post
			
 
				+
			
 
				+    # need to update the database !!!
			
 
				+    return post.id
			
 
				+
			
 
				+
			
 
				+#
			
 
				+# Create comments for all citations
			
 
				+#
			
 
				+
			
 
				+def wordpress_comment_by_scopus(wpid, data):
			
 
				+
			
 
				+    #print "Create Wordpress comment for post %d" % wpid
			
 
				+
			
 
				+    #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': '))
			
 
				+
			
 
				+    # decode date
			
 
				+    tsstring = data['prism:coverDate'].encode('utf-8')
			
 
				+    ts = datetime.strptime(tsstring, "%Y-%m-%d").timetuple()
			
 
				+    year = ts.tm_year
			
 
				+
			
 
				+    # Display cover date and title
			
 
				+    print("%s  -- %s" % (tsstring, data['dc:title']))
			
 
				+
			
 
				+
			
 
				+    # Create WP comment
			
 
				+    # define post structure
			
 
				+    comment = WordPressComment()
			
 
				+    comment.id = 0
			
 
				+    comment.content = ""
			
 
				+    
			
 
				+    if 'dc:creator' in data and data['dc:creator'] is not None:
			
 
				+        comment.content = data['dc:creator'] + 'et al.: '
			
 
				+    
			
 
				+    if 'prism:doi' in data and data['prism:doi'] is not None:
			
 
				+        comment.content +='<a href="http://dx.doi.org/' + data['prism:doi'] + '">' + data['dc:title'] + '</a>'
			
 
				+    else:
			
 
				+        comment.content +='<b>' + data['dc:title'] + '</b>'
			
 
				+
			
 
				+    comment.content += ' in ' + data['prism:publicationName']
			
 
				+    if 'prism:volume' in data and data['prism:volume'] is not None:
			
 
				+        comment.content += ', ' + data['prism:volume']
			
 
				+    comment.content += ' (' + str(year).encode('utf-8') + ')'
			
 
				+    if 'prism:pageRange' in data and data['prism:pageRange'] is not None:
			
 
				+        comment.content += ' ' + data['prism:pageRange']
			
 
				+    if 'article-number' in data and data['article-number'] is not None:
			
 
				+        comment.content += ' ' + data['article-number']
			
 
				+    comment.content += '.'
			
 
				+
			
 
				+    # Enable comments
			
 
				+    post = WordPressPost()
			
 
				+    postorig = wp.call(GetPost(wpid))
			
 
				+
			
 
				+    post.id = wpid
			
 
				+    post.date = postorig.date
			
 
				+    post.title = postorig.title
			
 
				+    post.content = postorig.content
			
 
				+
			
 
				+    post.comment_status = 'open' # allow comments - may be only for scopus
			
 
				+    wp.call(EditPost(wpid, post))
			
 
				+
			
 
				+    comment.id = wp.call(NewComment(wpid, comment))
			
 
				+
			
 
				+    # Warning: Date can only be specified in edit command
			
 
				+    comment.date_created = ts
			
 
				+    wp.call(EditComment(comment.id, comment))# Update the before created post
			
 
				+
			
 
				+    # Close comments for scopus posts
			
 
				+    post.comment_status = 'closed' # allow comments - may be only for scopus
			
 
				+    wp.call(EditPost(wpid, post))# Update the before created post
			
 
				+
			
 
				+    return comment.id
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Todo: can this be turned off for scopus???
			
 
				+# Add to themes function.php:
			
 
				+#add_filter('comment_flood_filter', '__return_false');
			
 
				+# wordpress_xmlrpc.exceptions.InvalidCredentialsError: You are posting comments too quickly. Slow down.
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/create_scopus.sql
+++ b/create_scopus.sql
@@ -0,0 +1,54 @@
 
				+#CREATE DATABASE scopus;
			
 
				+#USE scopus;
			
 
				+
			
 
				+
			
 
				+#
			
 
				+# Table structure for table ‘publications’
			
 
				+#
			
 
				+
			
 
				+CREATE TABLE IF NOT EXISTS publications (
			
 
				+    id INT AUTO_INCREMENT PRIMARY KEY,
			
 
				+    scopusid varchar(255) default NULL,
			
 
				+    eid varchar(255) default NULL,
			
 
				+    wpid int default NULL,
			
 
				+    citedbycount int default NULL,
			
 
				+    citesloaded int default NULL,
			
 
				+    categories varchar(255) default NULL,
			
 
				+    doi varchar(255) default NULL,
			
 
				+    scopusdata text default NULL,
			
 
				+    title varchar(255) default NULL,
			
 
				+    abstract text,
			
 
				+    bibtex text,
			
 
				+    ts datetime,
			
 
				+    UNIQUE KEY(scopusid)
			
 
				+);
			
 
				+
			
 
				+
			
 
				+CREATE TABLE IF NOT EXISTS citations (
			
 
				+    id INT AUTO_INCREMENT PRIMARY KEY,
			
 
				+    scopusid varchar(255) default NULL,
			
 
				+    eid varchar(255) default NULL,
			
 
				+    wpid int default NULL,
			
 
				+    wpcommentid int default NULL,
			
 
				+    citedbycount int default NULL,
			
 
				+    citesloaded int default NULL,
			
 
				+    categories varchar(255) default NULL,
			
 
				+    doi varchar(255) default NULL,
			
 
				+    scopusdata text default NULL,
			
 
				+    author  varchar(255) default NULL,
			
 
				+    title varchar(255) default NULL,
			
 
				+    journal varchar(255) default NULL,
			
 
				+    volume varchar(255) default NULL,
			
 
				+    pages varchar(255) default NULL,
			
 
				+    articleno varchar(255) default NULL,
			
 
				+    abstract text,
			
 
				+    bibtex text,
			
 
				+    ts datetime,
			
 
				+    UNIQUE KEY(wpid,scopusid)
			
 
				+);
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Todo: Save the full details of all publications
			
 
				+# Use json serialisation !!!
			
 
				+#
			
--- a/my_scopus.py
+++ b/my_scopus.py
@@ -0,0 +1,101 @@
 
				+# Access key and list of group members
			
 
				+
			
 
				+MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
			
 
				+
			
 
				+
			
 
				+# PDV
			
 
				+
			
 
				+ak = "35313939900"
			
 
				+
			
 
				+# Merged with ak, Scopus 24.2.17
			
 
				+#ak2 = "56656673700"
			
 
				+
			
 
				+csa = "15076530600"
			
 
				+
			
 
				+matthiasVogelgesang = "35303862100"
			
 
				+
			
 
				+timoDritschler = "56473578500"
			
 
				+
			
 
				+andreiShkarin = "56950893700"
			
 
				+
			
 
				+nicholasTanJerome = ""
			
 
				+
			
 
				+tillBergmann = "35308595100"
			
 
				+
			
 
				+armenBeglarian = "55343303900"
			
 
				+
			
 
				+petraRohr = "40561503300"
			
 
				+
			
 
				+norbertKunka = "35276889200"
			
 
				+
			
 
				+horstDemattio = "6506285395"
			
 
				+
			
 
				+
			
 
				+
			
 
				+# UFO-IPE
			
 
				+
			
 
				+# Warning: In the profile are two authors with the same name mixed!
			
 
				+micheleCaselle = "7006767859"
			
 
				+
			
 
				+urosStevanovic = "55557712600"
			
 
				+
			
 
				+lorenzoRota = "56473442500"
			
 
				+
			
 
				+matthiasBalzer = "35519411500"
			
 
				+
			
 
				+marcWeber = "56654729000"
			
 
				+mw2 = "56603987800"
			
 
				+mw3 = "7404138824"
			
 
				+
			
 
				+
			
 
				+# UFO-IPS
			
 
				+
			
 
				+tomyRolo = "56118820400"
			
 
				+tr2 = "35194644400"
			
 
				+tr3 = "35277157300"
			
 
				+
			
 
				+tomasFarago = "56655045700"
			
 
				+
			
 
				+
			
 
				+alexyErshof = "56441809800"
			
 
				+
			
 
				+romanShkarin = "56951331000"
			
 
				+
			
 
				+tiloBaumbach = "7003270957"
			
 
				+
			
 
				+# ASTOR/ NOVA
			
 
				+
			
 
				+thomasVandekamp = "46761453500"
			
 
				+
			
 
				+michaelHeethoff = "55979397800"
			
 
				+
			
 
				+sebastianSchmelzle = "34768986100"
			
 
				+
			
 
				+philipLoesel = ""
			
 
				+
			
 
				+
			
 
				+# Blacklist
			
 
				+
			
 
				+ashotChiligarian = "7004126133"
			
 
				+
			
 
				+hansBluemer = "7006284555"
			
 
				+
			
 
				+matthiasKleifegs = "6602072426"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+pdv = [ak, csa, tillBergmann, armenBeglarian, matthiasVogelgesang, petraRohr, timoDritschler, norbertKunka, horstDemattio]
			
 
				+
			
 
				+ufo_pdv = [ak, csa, matthiasVogelgesang, timoDritschler, andreiShkarin ]
			
 
				+
			
 
				+ufo_eps = [matthiasBalzer, lorenzoRota, micheleCaselle ]
			
 
				+
			
 
				+ufo_ips = [tomyRolo, tr2, tr3, tomasFarago]
			
 
				+
			
 
				+ufo_apps = [ thomasVandekamp]
			
 
				+
			
 
				+ufo = ufo_pdv + ufo_ips + ufo_eps
			
 
				+
			
 
				+
			
 
				+
			
--- a/scopus-get-publications.py
+++ b/scopus-get-publications.py
@@ -0,0 +1,413 @@
 
				+# Get new publications
			
 
				+# Publication and citations retrieval
			
 
				+# A. Kopmann, 6.2.17 (ak)
			
 
				+#
			
 
				+# Scope:
			
 
				+# Publications are once added to wordpressas a post or comment.
			
 
				+# Afterwards scopus will not change or modify anything any more !!!
			
 
				+# Update is completely in the resonsibility of the ufo users
			
 
				+#
			
 
				+
			
 
				+# Todo:
			
 
				+# - add mail to author button
			
 
				+# - save full scopus data in the database
			
 
				+#
			
 
				+
			
 
				+# Configuration - Scopus
			
 
				+
			
 
				+import datetime
			
 
				+import requests
			
 
				+import json
			
 
				+import os.path
			
 
				+
			
 
				+from my_scopus import MY_API_KEY
			
 
				+from my_scopus import ak, csa, pdv, ufo, ufo_pdv, ufo_ips, ufo_eps, ufo_apps
			
 
				+from ak_scopus import get_scopus_list, get_scopus_data, get_scopus_refs
			
 
				+
			
 
				+
			
 
				+from ak_wordpress import wordpress_post_by_scopus, wordpress_comment_by_scopus
			
 
				+
			
 
				+
			
 
				+# Mysql persistent data (Accout: scopus, $scopus$)
			
 
				+import pymysql.cursors
			
 
				+import pymysql
			
 
				+
			
 
				+db_host = 'localhost'
			
 
				+db_user = 'scopus'
			
 
				+db_pw = '$scopus$'
			
 
				+db_name = 'scopus'
			
 
				+
			
 
				+log_file = '/Users/kopmann/scopus-publications.log'
			
 
				+
			
 
				+
			
 
				+# Summary
			
 
				+npubs = 0
			
 
				+nnewpubs= 0
			
 
				+ncites = 0
			
 
				+nnewcites = 0
			
 
				+
			
 
				+
			
 
				+
			
 
				+def update_publications(authids,authname='',scopus_opts = '',max=0):
			
 
				+
			
 
				+
			
 
				+    print "=== Update of publications for the author group: " + authname
			
 
				+    #print str(authids)
			
 
				+
			
 
				+
			
 
				+    # Connect to the database
			
 
				+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
			
 
				+
			
 
				+    # Request all publications of a list of authors (in one query)
			
 
				+    # Result: list of records with (scopus ids, eid, citedbycount)
			
 
				+    # The citation could be used later also by wordpress (may be via a plugin)
			
 
				+
			
 
				+    publist = get_scopus_list(authids,scopus_opts,max)
			
 
				+    #publist = get_scopus_list(authids, scopus_opts, 3)
			
 
				+    #publist = get_scopus_list(authids, '(PUBYEAR AFT 2014)')
			
 
				+    print "Total number of publications: %d" % len(publist)
			
 
				+    #print publist
			
 
				+
			
 
				+
			
 
				+    # Save all publication to the publication database
			
 
				+    try:
			
 
				+        with connection.cursor() as cursor:
			
 
				+            for pub in publist:
			
 
				+                # 1 / Create a new records
			
 
				+                #print pub # Todo: strip the prefix SCOPUS_ID?!
			
 
				+                sql = "INSERT IGNORE INTO `publications` (`scopusid`,`eid`) VALUES (%s,%s)"
			
 
				+                cursor.execute(sql, (pub[0],pub[1]))
			
 
				+                    
			
 
				+                sql = "UPDATE `publications` SET `citedbycount` = %s WHERE `scopusid` = %s"
			
 
				+                cursor.execute(sql, (pub[2],pub[0]))
			
 
				+
			
 
				+                # 2 / Add categories
			
 
				+                if len(authname) > 0:
			
 
				+                    catlist = []
			
 
				+                    sql = "SELECT categories FROM publications WHERE scopusid = %s"
			
 
				+                    cursor.execute(sql, (pub[0]))
			
 
				+                    result = cursor.fetchall()
			
 
				+                    if len(result) > 0:
			
 
				+                        #print "Categories %s" % result[0]['categories']
			
 
				+                        cat = result[0]['categories']
			
 
				+
			
 
				+                    try:
			
 
				+                        catlist = json.loads(cat)
			
 
				+                    except TypeError:
			
 
				+                        print("No categories upto now")
			
 
				+
			
 
				+                    if authname not in catlist:
			
 
				+                        catlist += [authname]
			
 
				+                
			
 
				+                    sql = "UPDATE `publications` SET `categories` = %s WHERE `scopusid` = %s"
			
 
				+                    cursor.execute(sql, (json.dumps(catlist),pub[0]))
			
 
				+                        
			
 
				+
			
 
				+            # connection is not autocommit by default. So you must commit to save
			
 
				+            # your changes.
			
 
				+            connection.commit()
			
 
				+
			
 
				+    finally:
			
 
				+        connection.close()
			
 
				+
			
 
				+
			
 
				+# Read all citations and store in the citation table
			
 
				+def update_citations():
			
 
				+    global npubs
			
 
				+    
			
 
				+    print ""
			
 
				+    print "=== Update citatation of all publication in the database"
			
 
				+    
			
 
				+    # Connect to the database
			
 
				+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
			
 
				+
			
 
				+    # Loop over the publications and read all citations from scopus
			
 
				+
			
 
				+    # Todo: Shift to a separate script !?
			
 
				+    try:
			
 
				+        with connection.cursor() as cursor:
			
 
				+            # Read a single record
			
 
				+            sql = "SELECT wpid,eid,citedbycount,citesloaded FROM publications"
			
 
				+            cursor.execute(sql)
			
 
				+            result = cursor.fetchall()
			
 
				+          
			
 
				+            print "Total number of publications is %d" % len(result)
			
 
				+            npubs = len(result)
			
 
				+            #print "Npubs = %d" % npubs
			
 
				+
			
 
				+            for pub in result:
			
 
				+                wpid = int(pub['wpid'])
			
 
				+                if pub['citedbycount'] is None:
			
 
				+                    citedbycount = 0
			
 
				+                else:
			
 
				+                    citedbycount = int(pub['citedbycount'])
			
 
				+                if pub['citesloaded'] is None:
			
 
				+                    citesloaded = 0
			
 
				+                else:
			
 
				+                    citesloaded = int(pub['citesloaded'])
			
 
				+
			
 
				+                # read list of citations
			
 
				+                if pub['eid'] and (citedbycount > citesloaded):
			
 
				+                    
			
 
				+                    print "Processing %d = %s previously cited by %d"  % (wpid, pub['eid'], citesloaded)
			
 
				+
			
 
				+                    data = get_scopus_refs(pub['eid'])
			
 
				+                    #print json.dumps(data,sort_keys=True,indent=4, separators=(',', ': '))
			
 
				+                    
			
 
				+                    n = len(data)
			
 
				+                    #print "Number of citations loaded for processing %d" % n
			
 
				+                    #print data
			
 
				+                    
			
 
				+                    if n > 0:
			
 
				+                        for pub in data:
			
 
				+                            #print pub['eid'] + '  ' + pub['dc:title']
			
 
				+                           
			
 
				+                            try:
			
 
				+                                pubstr = json.dumps(pub)
			
 
				+                            except TypeError:
			
 
				+                                print("Error serializing pub entry")
			
 
				+
			
 
				+                            # save all comments to the database
			
 
				+                            # wirte complete scopus data of the article !?
			
 
				+                            sql = "INSERT IGNORE INTO `citations` (`wpid`,`scopusid`,`eid`,`scopusdata`) VALUES (%s,%s,%s,%s)"
			
 
				+                            cursor.execute(sql, (wpid,pub['dc:identifier'],pub['eid'],pubstr))
			
 
				+                            connection.commit()
			
 
				+
			
 
				+                        # Update the number of cites for this article
			
 
				+                        if n > citesloaded:
			
 
				+                            print "New citations found %d -> %d" %(citesloaded,n)
			
 
				+                            sql = "UPDATE `publications` SET `citesloaded`=" + str(n) + " WHERE wpid=" + str(wpid)
			
 
				+                            #print sql
			
 
				+                            cursor.execute(sql)
			
 
				+                            connection.commit()
			
 
				+
			
 
				+
			
 
				+    finally:
			
 
				+        connection.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Create wordpress posts for all entries that have none
			
 
				+def update_wp_posts():
			
 
				+    global nnewpubs
			
 
				+
			
 
				+    print ""
			
 
				+    print "=== Create posts for newly registered publication in scopus"
			
 
				+
			
 
				+    # Connect to the database
			
 
				+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
			
 
				+        
			
 
				+
			
 
				+    # Todo: Shift to a separate script !?
			
 
				+    try:
			
 
				+        with connection.cursor() as cursor:
			
 
				+            # Count all publications
			
 
				+            #sql = "SELECT COUNT(id) FROM publications"
			
 
				+            #cursor.execute(sql)
			
 
				+            #result = cursor.fetchall()
			
 
				+            #if len(result) > 0:
			
 
				+            #print result[0]['COUNT(id)']
			
 
				+
			
 
				+            # Read a single record
			
 
				+            sql = "SELECT scopusid,categories FROM publications WHERE wpid IS NULL"
			
 
				+            cursor.execute(sql)
			
 
				+            result = cursor.fetchall()
			
 
				+            if len(result) > 0:
			
 
				+                print "Number of new publications is %d" % len(result)
			
 
				+                nnewpubs = len(result)
			
 
				+            else:
			
 
				+                print "Nothing new found"
			
 
				+
			
 
				+
			
 
				+        # Retrieve all information required for the wordpress page
			
 
				+        for pub in result:
			
 
				+            print "Processing " + pub['scopusid'] + " categories " + pub['categories']
			
 
				+
			
 
				+            data = get_scopus_data(pub['scopusid'])
			
 
				+            #print json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
			
 
				+
			
 
				+            # Parse categories
			
 
				+            catlist = []
			
 
				+            try:
			
 
				+                catlist = json.loads(pub['categories'])
			
 
				+            except TypeError:
			
 
				+                print("No categories specified")
			
 
				+
			
 
				+            wpid = wordpress_post_by_scopus(data, catlist)
			
 
				+            
			
 
				+            #print wpid
			
 
				+            #print pub['scopusid']
			
 
				+
			
 
				+            # Warning: the resulting string uses double quotes (") so use
			
 
				+            # single quotes (') for the sql command
			
 
				+            datastr = json.dumps(data)
			
 
				+            #print datastr
			
 
				+
			
 
				+            # Update publication database !!!
			
 
				+            with connection.cursor() as cursor:
			
 
				+                # Read a single record
			
 
				+                #sql = "UPDATE publications SET wpid=" + str(wpid) + ",scopusdata='" + datastr + "' WHERE scopusid = '" + pub['scopusid'] + "'"
			
 
				+                sql = "UPDATE publications SET wpid=" + str(wpid) + " WHERE scopusid = '" + pub['scopusid'] + "'"
			
 
				+                cursor.execute(sql)
			
 
				+                connection.commit()
			
 
				+
			
 
				+    finally:
			
 
				+        connection.close()
			
 
				+
			
 
				+
			
 
				+def update_wp_comments():
			
 
				+    global ncites
			
 
				+    global nnewcites
			
 
				+    
			
 
				+    print ""
			
 
				+    print "=== Create comments for newly registered citations in scopus"
			
 
				+    
			
 
				+    
			
 
				+    # Connect to the database
			
 
				+    connection = pymysql.connect(host=db_host,user=db_user,password=db_pw,db=db_name,charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
			
 
				+    
			
 
				+    
			
 
				+    # Todo: Shift to a separate script !?
			
 
				+    try:
			
 
				+        with connection.cursor() as cursor:
			
 
				+            # Count all citations
			
 
				+            sql = "SELECT COUNT(id) FROM citations"
			
 
				+            cursor.execute(sql)
			
 
				+            result = cursor.fetchall()
			
 
				+            if len(result) > 0:
			
 
				+                ncites = result[0]['COUNT(id)']
			
 
				+                #print result[0]['COUNT(id)']
			
 
				+
			
 
				+            # Read a single record
			
 
				+            sql = "SELECT id,wpid,scopusdata FROM citations WHERE wpcommentid IS NULL"
			
 
				+            cursor.execute(sql)
			
 
				+            result = cursor.fetchall()
			
 
				+            print "Number of new citations is %d" % len(result)
			
 
				+            nnewcites = len(result)
			
 
				+
			
 
				+            for pub in result:
			
 
				+                wpid = int(pub['wpid'])
			
 
				+                print "Processing post " + str(wpid)
			
 
				+            
			
 
				+                data = []
			
 
				+                try:
			
 
				+                    data = json.loads(pub['scopusdata'])
			
 
				+                except TypeError:
			
 
				+                    print("Scopus data missing?!")
			
 
				+
			
 
				+                # If the creation of the comment fails, the wpcommentid 0 is
			
 
				+                # written to the database. This means, there is no second try
			
 
				+                # to get this citations added.
			
 
				+                # All failed comments can be found by searching for wpcommentid = 0
			
 
				+                #
			
 
				+                wpcommentid = 0
			
 
				+                try:
			
 
				+                    wpcommentid = wordpress_comment_by_scopus(wpid, data)
			
 
				+                except:
			
 
				+                    print "Error: Submission of comment failed"
			
 
				+
			
 
				+                sql = "UPDATE citations SET wpcommentid=" + str(wpcommentid) + " WHERE id = '" + str(pub['id']) + "'"
			
 
				+                cursor.execute(sql)
			
 
				+                connection.commit()
			
 
				+
			
 
				+    finally:
			
 
				+        connection.close()
			
 
				+
			
 
				+
			
 
				+# Todo: Add a script to save the data for all publications in the database!!!
			
 
				+# There was some problem before?!
			
 
				+#
			
 
				+
			
 
				+# Todo: Add scripts to check consistence in the database
			
 
				+# and fix problems if detected
			
 
				+# E.g. search for wpcommentid == 0
			
 
				+# Check if, wp posts + comments are still availabe, display
			
 
				+# deleted entries
			
 
				+#
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Main
			
 
				+
			
 
				+start = datetime.datetime.now()
			
 
				+
			
 
				+print ""
			
 
				+print "***********************************************"
			
 
				+print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****"
			
 
				+print "***********************************************"
			
 
				+print ""
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Update publaction database; search for new publications
			
 
				+# Loop over all user groups defined in ak_scopus.py
			
 
				+
			
 
				+# Todo: Detect, if there is no access to scopus !!!
			
 
				+#
			
 
				+
			
 
				+# Define the author, that should be considered
			
 
				+#authors = ["Computing", ufo_pdv]
			
 
				+#print authors
			
 
				+
			
 
				+#update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)',10)
			
 
				+update_publications(ufo_pdv, "Computing", '(PUBYEAR AFT 2006)')
			
 
				+update_publications(ufo_ips, "X-ray Imaging", '(PUBYEAR AFT 2010)')
			
 
				+update_publications(ufo_eps, "Electronics", '(PUBYEAR AFT 2010)')
			
 
				+update_publications(ufo_apps, "Morphology", '(PUBYEAR AFT 2010)')
			
 
				+
			
 
				+
			
 
				+update_wp_posts()
			
 
				+
			
 
				+# read all citations
			
 
				+# Todo: read only new citations?!
			
 
				+
			
 
				+
			
 
				+update_citations()
			
 
				+
			
 
				+
			
 
				+# loop over all cites and post comments to wordpress, when necessary
			
 
				+# update database
			
 
				+
			
 
				+update_wp_comments()
			
 
				+# Todo: deactivate comments for scopus posts!!!
			
 
				+
			
 
				+
			
 
				+# Display summary
			
 
				+end = datetime.datetime.now()
			
 
				+print ""
			
 
				+print "Summary: (see also logfile %s) " % log_file
			
 
				+print "Date       = " + str(start)
			
 
				+print "NPubs      = " + str(npubs)
			
 
				+print "NNewPubs   = " + str(nnewpubs)
			
 
				+print "NCites     = " + str(ncites)
			
 
				+print "NNewCites  = " + str(nnewcites)
			
 
				+print "Runtime    = " + str(end - start)
			
 
				+
			
 
				+
			
 
				+# Write summary to log file
			
 
				+if not os.path.isfile(log_file):
			
 
				+    print "Create logfile " + log_file
			
 
				+    # Open file and write header
			
 
				+    log = open(log_file,"w")
			
 
				+    log.write(__file__ + "\n")
			
 
				+    log.write("\n")
			
 
				+    log.write("      Date\t    Time\tNPubs\tNNewP\tNCite\tNNewC\t          TRun\n")
			
 
				+    log.write("------------------------------------------------------------------------------\n")
			
 
				+    log.close()
			
 
				+
			
 
				+
			
 
				+log = open(log_file,"a")
			
 
				+log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
			
 
				+                                    start.strftime("%H:%M:%S"),
			
 
				+                                    npubs,nnewpubs,ncites,nnewcites,str(end-start)))
			
 
				+log.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+# done
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/update.sh
+++ b/update.sh
@@ -0,0 +1,6 @@
 
				+# Update publications on UFO webpage by new data in Scopus
			
 
				+# A Kopmann, 11.4.2017
			
 
				+#
			
 
				+
			
 
				+python -W ignore /root/scopus/scopus-get-publications.py
			
 
				+