ak_scopus.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. """ Functions that implement the access to the Scopus database """
  2. import requests
  3. import json
  4. from pprint import pprint
  5. from config import *
  6. def get_scopus_list(author_list, opt='', max=0):
  7. """ Get the last N publications of an given author list
  8. Arguments:
  9. list of scopus author ids (e.g. "35313939900")
  10. Returns:
  11. list of scopus ids, electronic id and number of citations
  12. DOI would be desirable but is not available in some records
  13. e.g. ('SCOPUS_ID:0037368024',)
  14. The result can be used in the display functions like get_scopus_brief()
  15. """
  16. count = 25 # define the number of requests publications in one call
  17. if isinstance(author_list, list):
  18. #print "Length of author list %d" % len(author_list)
  19. query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
  20. else:
  21. query = 'AU-ID('+author_list+')'
  22. if len(opt) > 0:
  23. query = query + " AND " + opt
  24. # The scopus query is limited by the number of results
  25. # To get all results a loop over all results is required
  26. # The loop is controlled by count and start
  27. # limit the results by time: Arguments?
  28. # e.g. PUBYEAR AFT 2010
  29. if (max > 0) and (max < count):
  30. npubstoget = max
  31. count = max
  32. else:
  33. npubstoget = count
  34. n = 0
  35. start = 0
  36. ntotal = 0
  37. publist = []
  38. while (npubstoget > 0):
  39. loopargs = "&count=%d&start=%d" % (count, start)
  40. #print loopargs
  41. #print "Query: " + query
  42. url = ("http://api.elsevier.com/content/search/scopus?query="
  43. +query+ "&field=dc:identifier,citedby-count,eid" + loopargs)
  44. #print "URL: " + url
  45. resp = requests.get(url,
  46. headers={'Accept':'application/json',
  47. 'X-ELS-APIKey': MY_API_KEY})
  48. #print resp
  49. results = resp.json()
  50. if (n==0):
  51. n = int(results['search-results']['opensearch:totalResults'])
  52. #print "Number publications found = %d" % n
  53. if (max == 0):
  54. npubstoget = n
  55. #print json.dumps(resp.json(),
  56. # sort_keys=True,
  57. # indent=4, separators=(',', ': '))
  58. newpubs = []
  59. for r in results['search-results']["entry"]:
  60. #print r
  61. try:
  62. newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]]
  63. except KeyError:
  64. print "Warning: There is data missing"
  65. print r
  66. # Todo : DOI is not always available !!!
  67. #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]]
  68. #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]]
  69. publist += newpubs
  70. # Todo: Counting by the list is dangerous - if an element is missing !!!
  71. nreceived = len(newpubs)
  72. nlist = len(publist)
  73. #print "Received: %d" %nreceived
  74. #print "In list= %d" %nlist
  75. # Next iteration
  76. #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max)
  77. npubstoget = npubstoget - count
  78. start += count
  79. return publist
  80. def get_scopus_data(SCOPUS_ID):
  81. """ Get complete data for a single publication """
  82. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  83. + SCOPUS_ID
  84. + "?field=article-number,title,publicationName,volume,issueIdentifier,"
  85. + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,"
  86. + "prism:aggregationType,url,identifier,description,authkeywords,"
  87. + "authors,prism:issn,idxterms")
  88. #print url
  89. resp = requests.get(url,
  90. headers={'Accept':'application/json',
  91. 'X-ELS-APIKey': MY_API_KEY})
  92. #results = json.loads(resp.text.encode('utf-8'))
  93. results = resp.json()
  94. return results
  95. def get_scopus_refs(EID):
  96. """ Get list of all citations of a single publication """
  97. count = 25
  98. n = 0
  99. npubstoget = 25
  100. start = 0
  101. ntotal = 0
  102. publist = []
  103. while (npubstoget > 0):
  104. loopargs = "&count=%d&start=%d" % (count, start)
  105. #print loopargs
  106. url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
  107. + EID + ")" + loopargs)
  108. #print "URL: " + url
  109. resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
  110. results = resp.json()
  111. #pprint (json.dumps(results,sort_keys=True,indent=4, separators=(',', ': ')))
  112. try:
  113. if (n==0):
  114. n = int(results['search-results']['opensearch:totalResults'])
  115. #print "Current number citations in scopus = %d" % n
  116. npubstoget = n
  117. if (n>0):
  118. publist += results['search-results']['entry']
  119. npubstoget = npubstoget - count
  120. start += count
  121. except:
  122. print "Error:"
  123. pprint (json.dumps(results,sort_keys=True,indent=4, separators=(',', ': ')))
  124. return publist
  125. def get_scopus_info(SCOPUS_ID):
  126. """ Get complete information from Scopus for a single publication """
  127. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  128. + SCOPUS_ID
  129. + "?field=article-number,title,publicationName,volume,issueIdentifier,"
  130. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
  131. #print url
  132. resp = requests.get(url,
  133. headers={'Accept':'application/json',
  134. 'X-ELS-APIKey': MY_API_KEY})
  135. results = json.loads(resp.text.encode('utf-8'))
  136. #print resp
  137. #print results
  138. fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'
  139. return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
  140. title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
  141. journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
  142. volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
  143. articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
  144. results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
  145. date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
  146. doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
  147. cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
  148. abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
  149. def get_scopus_brief(SCOPUS_ID, max_authors=1000):
  150. """ Display a list of publications in plain text format
  151. Argument:
  152. scopus id of the publication
  153. Todo:
  154. - Implement other formats (e.g. html, bibtex)
  155. - Format publications as articles, Title, Abstract
  156. """
  157. id = SCOPUS_ID
  158. if isinstance(id, list):
  159. id = id[0]
  160. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  161. + id
  162. + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
  163. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")
  164. #print url
  165. resp = requests.get(url,
  166. headers={'Accept':'application/json',
  167. 'X-ELS-APIKey': MY_API_KEY})
  168. results = json.loads(resp.text.encode('utf-8'))
  169. #print resp
  170. #print results
  171. coredata = results['abstracts-retrieval-response']['coredata']
  172. pub = ''
  173. authors = results['abstracts-retrieval-response']['authors']['author']
  174. #print "Number of authors: %d" %len(authors)
  175. if len(authors) > max_authors:
  176. return ''
  177. if len(authors) > 20:
  178. pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
  179. else:
  180. pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '
  181. try:
  182. if coredata.get('dc:title'):
  183. pub = pub + coredata.get('dc:title').encode('utf-8')
  184. except ValueError:
  185. print "!!! Error encoding title of publication !!!"
  186. #print coredata.get('dc:title')
  187. pub = pub + coredata.get('dc:title')
  188. if coredata.get('prism:publicationName'):
  189. pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')
  190. if coredata.get('prism:volume'):
  191. pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')
  192. if coredata.get('prism:issueIdentifier'):
  193. pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
  194. if coredata.get('prism:coverDate'):
  195. pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '
  196. if coredata.get('prism:pageRange'):
  197. pub = pub + coredata.get('prism:pageRange').encode('utf-8')
  198. elif coredata.get('article-number'):
  199. pub = pub + coredata.get('article-number').encode('utf-8')
  200. if coredata.get('prism:doi'):
  201. pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')
  202. if coredata.get('citedby-count'):
  203. pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'
  204. pub = pub + '.\n'
  205. return pub