""" Functions that implement the access to the Scopus database """ import requests import json from config import * def get_scopus_list(author_list, opt='', max=0): """ Get the last N publications of an given author list Arguments: list of scopus author ids (e.g. "35313939900") Returns: list of scopus ids, electronic id and number of citations DOI would be desirable but is not available in some records e.g. ('SCOPUS_ID:0037368024',) The result can be used in the display functions like get_scopus_brief() """ count = 25 # define the number of requests publications in one call if isinstance(author_list, list): #print "Length of author list %d" % len(author_list) query = ' OR '.join(['AU-ID('+au+')' for au in author_list]) else: query = 'AU-ID('+author_list+')' if len(opt) > 0: query = query + " AND " + opt # The scopus query is limited by the number of results # To get all results a loop over all results is required # The loop is controlled by count and start # limit the results by time: Arguments? # e.g. PUBYEAR AFT 2010 if (max > 0) and (max < count): npubstoget = max count = max else: npubstoget = count n = 0 start = 0 ntotal = 0 publist = [] while (npubstoget > 0): loopargs = "&count=%d&start=%d" % (count, start) #print loopargs #print "Query: " + query url = ("http://api.elsevier.com/content/search/scopus?query=" +query+ "&field=dc:identifier,citedby-count,eid" + loopargs) #print "URL: " + url resp = requests.get(url, headers={'Accept':'application/json', 'X-ELS-APIKey': MY_API_KEY}) #print resp results = resp.json() if (n==0): n = int(results['search-results']['opensearch:totalResults']) #print "Number publications found = %d" % n if (max == 0): npubstoget = n #print json.dumps(resp.json(), # sort_keys=True, # indent=4, separators=(',', ': ')) newpubs = [] for r in results['search-results']["entry"]: #print r try: newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]] except KeyError: print "Warning: There is data missing" print r # Todo : DOI is not always available !!! #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]] #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]] publist += newpubs # Todo: Counting by the list is dangerous - if an element is missing !!! nreceived = len(newpubs) nlist = len(publist) #print "Received: %d" %nreceived #print "In list= %d" %nlist # Next iteration #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max) npubstoget = npubstoget - count start += count return publist def get_scopus_data(SCOPUS_ID): """ Get complete data for a single publication """ url = ("http://api.elsevier.com/content/abstract/scopus_id/" + SCOPUS_ID + "?field=article-number,title,publicationName,volume,issueIdentifier," + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn,authkeywords") #print url resp = requests.get(url, headers={'Accept':'application/json', 'X-ELS-APIKey': MY_API_KEY}) #results = json.loads(resp.text.encode('utf-8')) results = resp.json() return results def get_scopus_refs(EID): """ Get list of all citations of a single publication """ count = 25 n = 0 npubstoget = 25 start = 0 ntotal = 0 publist = [] while (npubstoget > 0): loopargs = "&count=%d&start=%d" % (count, start) #print loopargs url = ("https://api.elsevier.com/content/search/scopus?query=refeid(" + EID + ")" + loopargs) #print "URL: " + url resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY}) results = resp.json() #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': ')) if (n==0): n = int(results['search-results']['opensearch:totalResults']) #print "Current number citations in scopus = %d" % n npubstoget = n if (n>0): publist += results['search-results']['entry'] npubstoget = npubstoget - count start += count return publist def get_scopus_info(SCOPUS_ID): """ Get complete information from Scopus for a single publication """ url = ("http://api.elsevier.com/content/abstract/scopus_id/" + SCOPUS_ID + "?field=article-number,title,publicationName,volume,issueIdentifier," + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn") #print url resp = requests.get(url, headers={'Accept':'application/json', 'X-ELS-APIKey': MY_API_KEY}) results = json.loads(resp.text.encode('utf-8')) #print resp #print results fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n' return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]), title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'), journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'), volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'), articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'), date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'), doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'), cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')), abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8')) def get_scopus_brief(SCOPUS_ID, max_authors=1000): """ Display a list of publications in plain text format Argument: scopus id of the publication Todo: - Implement other formats (e.g. html, bibtex) - Format publications as articles, Title, Abstract """ id = SCOPUS_ID if isinstance(id, list): id = id[0] url = ("http://api.elsevier.com/content/abstract/scopus_id/" + id + "?field=authors,article-number,title,publicationName,volume,issueIdentifier," + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn") #print url resp = requests.get(url, headers={'Accept':'application/json', 'X-ELS-APIKey': MY_API_KEY}) results = json.loads(resp.text.encode('utf-8')) #print resp #print results coredata = results['abstracts-retrieval-response']['coredata'] pub = '' authors = results['abstracts-retrieval-response']['authors']['author'] #print "Number of authors: %d" %len(authors) if len(authors) > max_authors: return '' if len(authors) > 20: pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: ' else: pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': ' try: if coredata.get('dc:title'): pub = pub + coredata.get('dc:title').encode('utf-8') except ValueError: print "!!! Error encoding title of publication !!!" #print coredata.get('dc:title') pub = pub + coredata.get('dc:title') if coredata.get('prism:publicationName'): pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8') if coredata.get('prism:volume'): pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8') if coredata.get('prism:issueIdentifier'): pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8') if coredata.get('prism:coverDate'): pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') ' if coredata.get('prism:pageRange'): pub = pub + coredata.get('prism:pageRange').encode('utf-8') elif coredata.get('article-number'): pub = pub + coredata.get('article-number').encode('utf-8') if coredata.get('prism:doi'): pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8') if coredata.get('citedby-count'): pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)' pub = pub + '.\n' return pub