123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- """ Functions that implement the access to the Scopus database """
-
- import requests
- import json
- from config import *
- def get_scopus_list(author_list, opt='', max=0):
- """ Get the last N publications of an given author list
-
- Arguments:
- list of scopus author ids (e.g. "35313939900")
-
- Returns:
- list of scopus ids, electronic id and number of citations
-
- DOI would be desirable but is not available in some records
- e.g. ('SCOPUS_ID:0037368024',)
- The result can be used in the display functions like get_scopus_brief()
- """
-
- count = 25 # define the number of requests publications in one call
- if isinstance(author_list, list):
- #print "Length of author list %d" % len(author_list)
- query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
- else:
- query = 'AU-ID('+author_list+')'
- if len(opt) > 0:
- query = query + " AND " + opt
-
- # The scopus query is limited by the number of results
- # To get all results a loop over all results is required
- # The loop is controlled by count and start
- # limit the results by time: Arguments?
- # e.g. PUBYEAR AFT 2010
- if (max > 0) and (max < count):
- npubstoget = max
- count = max
- else:
- npubstoget = count
- n = 0
- start = 0
- ntotal = 0
- publist = []
- while (npubstoget > 0):
-
- loopargs = "&count=%d&start=%d" % (count, start)
- #print loopargs
- #print "Query: " + query
- url = ("http://api.elsevier.com/content/search/scopus?query="
- +query+ "&field=dc:identifier,citedby-count,eid" + loopargs)
- #print "URL: " + url
- resp = requests.get(url,
- headers={'Accept':'application/json',
- 'X-ELS-APIKey': MY_API_KEY})
- #print resp
- results = resp.json()
- if (n==0):
- n = int(results['search-results']['opensearch:totalResults'])
- #print "Number publications found = %d" % n
- if (max == 0):
- npubstoget = n
- #print json.dumps(resp.json(),
- # sort_keys=True,
- # indent=4, separators=(',', ': '))
- newpubs = []
- for r in results['search-results']["entry"]:
- #print r
- try:
- newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]]
- except KeyError:
- print "Warning: There is data missing"
- print r
- # Todo : DOI is not always available !!!
- #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]]
- #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]]
- publist += newpubs
- # Todo: Counting by the list is dangerous - if an element is missing !!!
- nreceived = len(newpubs)
- nlist = len(publist)
- #print "Received: %d" %nreceived
- #print "In list= %d" %nlist
- # Next iteration
- #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max)
- npubstoget = npubstoget - count
- start += count
- return publist
- def get_scopus_data(SCOPUS_ID):
- """ Get complete data for a single publication """
-
- url = ("http://api.elsevier.com/content/abstract/scopus_id/"
- + SCOPUS_ID
- + "?field=article-number,title,publicationName,volume,issueIdentifier,"
- + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn,authkeywords")
- #print url
- resp = requests.get(url,
- headers={'Accept':'application/json',
- 'X-ELS-APIKey': MY_API_KEY})
-
- #results = json.loads(resp.text.encode('utf-8'))
- results = resp.json()
- return results
- def get_scopus_refs(EID):
- """ Get list of all citations of a single publication """
- count = 25
- n = 0
- npubstoget = 25
- start = 0
- ntotal = 0
- publist = []
- while (npubstoget > 0):
-
- loopargs = "&count=%d&start=%d" % (count, start)
- #print loopargs
- url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
- + EID + ")" + loopargs)
- #print "URL: " + url
- resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
- results = resp.json()
- #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': '))
- if (n==0):
- n = int(results['search-results']['opensearch:totalResults'])
- #print "Current number citations in scopus = %d" % n
- npubstoget = n
- if (n>0):
- publist += results['search-results']['entry']
- npubstoget = npubstoget - count
- start += count
- return publist
- def get_scopus_info(SCOPUS_ID):
- """ Get complete information from Scopus for a single publication """
-
- url = ("http://api.elsevier.com/content/abstract/scopus_id/"
- + SCOPUS_ID
- + "?field=article-number,title,publicationName,volume,issueIdentifier,"
- + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
- #print url
- resp = requests.get(url,
- headers={'Accept':'application/json',
- 'X-ELS-APIKey': MY_API_KEY})
- results = json.loads(resp.text.encode('utf-8'))
- #print resp
- #print results
-
- fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'
- return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
- title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
- journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
- volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
- articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
- results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
- date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
- doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
- cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
- abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
- def get_scopus_brief(SCOPUS_ID, max_authors=1000):
- """ Display a list of publications in plain text format
-
- Argument:
- scopus id of the publication
-
- Todo:
- - Implement other formats (e.g. html, bibtex)
- - Format publications as articles, Title, Abstract
- """
-
- id = SCOPUS_ID
- if isinstance(id, list):
- id = id[0]
-
- url = ("http://api.elsevier.com/content/abstract/scopus_id/"
- + id
- + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
- + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")
- #print url
- resp = requests.get(url,
- headers={'Accept':'application/json',
- 'X-ELS-APIKey': MY_API_KEY})
- results = json.loads(resp.text.encode('utf-8'))
- #print resp
- #print results
-
- coredata = results['abstracts-retrieval-response']['coredata']
-
- pub = ''
- authors = results['abstracts-retrieval-response']['authors']['author']
- #print "Number of authors: %d" %len(authors)
- if len(authors) > max_authors:
- return ''
- if len(authors) > 20:
- pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
- else:
- pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '
- try:
- if coredata.get('dc:title'):
- pub = pub + coredata.get('dc:title').encode('utf-8')
- except ValueError:
- print "!!! Error encoding title of publication !!!"
- #print coredata.get('dc:title')
- pub = pub + coredata.get('dc:title')
- if coredata.get('prism:publicationName'):
- pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')
- if coredata.get('prism:volume'):
- pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')
- if coredata.get('prism:issueIdentifier'):
- pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
-
- if coredata.get('prism:coverDate'):
- pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '
- if coredata.get('prism:pageRange'):
- pub = pub + coredata.get('prism:pageRange').encode('utf-8')
- elif coredata.get('article-number'):
- pub = pub + coredata.get('article-number').encode('utf-8')
- if coredata.get('prism:doi'):
- pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')
- if coredata.get('citedby-count'):
- pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'
- pub = pub + '.\n'
- return pub
|