csa
/
scopus


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
							""" Functions that implement the access to the Scopus database """
    

import requests
import json

from config import *


def get_scopus_list(author_list, opt='', max=0):
    """ Get the last N publications of an given author list
    
    Arguments: 
        list of scopus author ids (e.g. "35313939900")
    
    Returns: 
        list of scopus ids, electronic id and number of citations
    
    DOI would be desirable but is not available in some records
    e.g. ('SCOPUS_ID:0037368024',)
    The result can be used in the display functions like get_scopus_brief()
    """
   
    count = 25 # define the number of requests publications in one call


    if isinstance(author_list, list):
        #print "Length of author list %d" % len(author_list)
        query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
    else:
        query = 'AU-ID('+author_list+')'

    if len(opt) > 0:
        query = query + " AND " + opt
    
# The scopus query is limited by the number of results
# To get all results a loop over all results is required
# The loop is controlled by count and start

# limit the results by time: Arguments?
# e.g. PUBYEAR AFT 2010

    if (max > 0) and (max < count):
        npubstoget = max
        count = max
    else:
        npubstoget = count

    n = 0
    start = 0
    ntotal = 0
    publist = []
    while (npubstoget > 0):
        
        loopargs = "&count=%d&start=%d" % (count, start)
        #print loopargs

        #print "Query: " + query
        url = ("http://api.elsevier.com/content/search/scopus?query="
               +query+ "&field=dc:identifier,citedby-count,eid" + loopargs)
        #print "URL: " + url
        resp = requests.get(url,
                headers={'Accept':'application/json',
                        'X-ELS-APIKey': MY_API_KEY})
        #print resp
        results = resp.json()


        if (n==0):
            n = int(results['search-results']['opensearch:totalResults'])
            #print "Number publications found = %d" % n
            if (max == 0):
                npubstoget = n

        #print json.dumps(resp.json(),
        #             sort_keys=True,
        #             indent=4, separators=(',', ': '))

        newpubs = []
        for r in results['search-results']["entry"]:
            #print r
            try:
                newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]]
            except KeyError:
                print "Warning: There is data missing"
                print r

# Todo : DOI is not always available !!!


        #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]]
        #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]]

        publist += newpubs

# Todo: Counting by the list is dangerous - if an element is missing !!!

        nreceived = len(newpubs)
        nlist = len(publist)
        #print "Received: %d" %nreceived
        #print "In list= %d" %nlist

        # Next iteration
        #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max)
        npubstoget = npubstoget - count
        start += count

    return publist


def get_scopus_data(SCOPUS_ID):
    """ Get complete data for a single publication """
    
    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
           + SCOPUS_ID
           + "?field=article-number,title,publicationName,volume,issueIdentifier,"
           + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn,authkeywords")
    #print url
    resp = requests.get(url,
            headers={'Accept':'application/json',
            'X-ELS-APIKey': MY_API_KEY})
            
    #results = json.loads(resp.text.encode('utf-8'))
    results = resp.json()
    return results


def get_scopus_refs(EID):
    """ Get list of all citations of a single publication """

    count = 25
    n = 0
    npubstoget = 25
    start = 0
    ntotal = 0
    publist = []

    while (npubstoget > 0):
    
        loopargs = "&count=%d&start=%d" % (count, start)
        #print loopargs

        url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
               + EID + ")" + loopargs)

        #print "URL: " + url
        resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})

        results = resp.json()
        #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': '))

        if (n==0):
            n = int(results['search-results']['opensearch:totalResults'])
            #print "Current number citations in scopus = %d" % n
            npubstoget = n

        if (n>0):
                publist += results['search-results']['entry']

        npubstoget = npubstoget - count
        start += count


    return publist


def get_scopus_info(SCOPUS_ID):
    """ Get complete information from Scopus for a single publication """
    
    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
           + SCOPUS_ID
           + "?field=article-number,title,publicationName,volume,issueIdentifier,"
           + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
    #print url
    resp = requests.get(url,
                headers={'Accept':'application/json',
                        'X-ELS-APIKey': MY_API_KEY})
    results = json.loads(resp.text.encode('utf-8'))
    #print resp
    #print results
    
    fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'

    return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
                                title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
                                 journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
                                 volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
                                 articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
                                             results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
                                 date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
                                 doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
                                 cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
                                 abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))


def get_scopus_brief(SCOPUS_ID, max_authors=1000):
    """ Display a list of publications in plain text format
    
    Argument: 
        scopus id of the publication
    
    Todo: 
        - Implement other formats (e.g. html, bibtex)
        - Format publications as articles, Title, Abstract
    """
    
    id = SCOPUS_ID
    if isinstance(id, list):
        id = id[0]
        
    url = ("http://api.elsevier.com/content/abstract/scopus_id/"
           + id
           + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
           + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")

    #print url
    resp = requests.get(url,
                headers={'Accept':'application/json',
                            'X-ELS-APIKey': MY_API_KEY})
    results = json.loads(resp.text.encode('utf-8'))
    #print resp
    #print results
    
    coredata = results['abstracts-retrieval-response']['coredata']
    
    pub = ''
    authors = results['abstracts-retrieval-response']['authors']['author']
    #print "Number of authors: %d" %len(authors)

    if len(authors) > max_authors:
        return ''

    if len(authors) > 20:
        pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
    else:
        pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '

    try:
        if coredata.get('dc:title'):
            pub = pub + coredata.get('dc:title').encode('utf-8')
    except ValueError:
        print "!!! Error encoding title of publication !!!"
        #print coredata.get('dc:title')
        pub = pub + coredata.get('dc:title')

    if coredata.get('prism:publicationName'):
        pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')

    if coredata.get('prism:volume'):
        pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')

    if coredata.get('prism:issueIdentifier'):
        pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
    
    if coredata.get('prism:coverDate'):
        pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '

    if coredata.get('prism:pageRange'):
        pub = pub + coredata.get('prism:pageRange').encode('utf-8')
    elif coredata.get('article-number'):
        pub = pub + coredata.get('article-number').encode('utf-8')

    if coredata.get('prism:doi'):
        pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')

    if coredata.get('citedby-count'):
        pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'

    pub = pub + '.\n'


    return pub