ak_scopus.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. """ Functions that implement the access to the Scopus database """
  2. import requests
  3. import json
  4. from config import *
  5. def get_scopus_list(author_list, opt='', max=0):
  6. """ Get the last N publications of an given author list
  7. Arguments:
  8. list of scopus author ids (e.g. "35313939900")
  9. Returns:
  10. list of scopus ids, electronic id and number of citations
  11. DOI would be desirable but is not available in some records
  12. e.g. ('SCOPUS_ID:0037368024',)
  13. The result can be used in the display functions like get_scopus_brief()
  14. """
  15. count = 25 # define the number of requests publications in one call
  16. if isinstance(author_list, list):
  17. #print "Length of author list %d" % len(author_list)
  18. query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
  19. else:
  20. query = 'AU-ID('+author_list+')'
  21. if len(opt) > 0:
  22. query = query + " AND " + opt
  23. # The scopus query is limited by the number of results
  24. # To get all results a loop over all results is required
  25. # The loop is controlled by count and start
  26. # limit the results by time: Arguments?
  27. # e.g. PUBYEAR AFT 2010
  28. if (max > 0) and (max < count):
  29. npubstoget = max
  30. count = max
  31. else:
  32. npubstoget = count
  33. n = 0
  34. start = 0
  35. ntotal = 0
  36. publist = []
  37. while (npubstoget > 0):
  38. loopargs = "&count=%d&start=%d" % (count, start)
  39. #print loopargs
  40. #print "Query: " + query
  41. url = ("http://api.elsevier.com/content/search/scopus?query="
  42. +query+ "&field=dc:identifier,citedby-count,eid" + loopargs)
  43. #print "URL: " + url
  44. resp = requests.get(url,
  45. headers={'Accept':'application/json',
  46. 'X-ELS-APIKey': MY_API_KEY})
  47. #print resp
  48. results = resp.json()
  49. if (n==0):
  50. n = int(results['search-results']['opensearch:totalResults'])
  51. #print "Number publications found = %d" % n
  52. if (max == 0):
  53. npubstoget = n
  54. #print json.dumps(resp.json(),
  55. # sort_keys=True,
  56. # indent=4, separators=(',', ': '))
  57. newpubs = []
  58. for r in results['search-results']["entry"]:
  59. #print r
  60. try:
  61. newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]]
  62. except KeyError:
  63. print "Warning: There is data missing"
  64. print r
  65. # Todo : DOI is not always available !!!
  66. #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]]
  67. #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]]
  68. publist += newpubs
  69. # Todo: Counting by the list is dangerous - if an element is missing !!!
  70. nreceived = len(newpubs)
  71. nlist = len(publist)
  72. #print "Received: %d" %nreceived
  73. #print "In list= %d" %nlist
  74. # Next iteration
  75. #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max)
  76. npubstoget = npubstoget - count
  77. start += count
  78. return publist
  79. def get_scopus_data(SCOPUS_ID):
  80. """ Get complete data for a single publication """
  81. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  82. + SCOPUS_ID
  83. + "?field=article-number,title,publicationName,volume,issueIdentifier,"
  84. + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn,authkeywords")
  85. #print url
  86. resp = requests.get(url,
  87. headers={'Accept':'application/json',
  88. 'X-ELS-APIKey': MY_API_KEY})
  89. #results = json.loads(resp.text.encode('utf-8'))
  90. results = resp.json()
  91. return results
  92. def get_scopus_refs(EID):
  93. """ Get list of all citations of a single publication """
  94. count = 25
  95. n = 0
  96. npubstoget = 25
  97. start = 0
  98. ntotal = 0
  99. publist = []
  100. while (npubstoget > 0):
  101. loopargs = "&count=%d&start=%d" % (count, start)
  102. #print loopargs
  103. url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
  104. + EID + ")" + loopargs)
  105. #print "URL: " + url
  106. resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
  107. results = resp.json()
  108. #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': '))
  109. if (n==0):
  110. n = int(results['search-results']['opensearch:totalResults'])
  111. #print "Current number citations in scopus = %d" % n
  112. npubstoget = n
  113. if (n>0):
  114. publist += results['search-results']['entry']
  115. npubstoget = npubstoget - count
  116. start += count
  117. return publist
  118. def get_scopus_info(SCOPUS_ID):
  119. """ Get complete information from Scopus for a single publication """
  120. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  121. + SCOPUS_ID
  122. + "?field=article-number,title,publicationName,volume,issueIdentifier,"
  123. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
  124. #print url
  125. resp = requests.get(url,
  126. headers={'Accept':'application/json',
  127. 'X-ELS-APIKey': MY_API_KEY})
  128. results = json.loads(resp.text.encode('utf-8'))
  129. #print resp
  130. #print results
  131. fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'
  132. return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
  133. title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
  134. journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
  135. volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
  136. articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
  137. results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
  138. date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
  139. doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
  140. cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
  141. abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
  142. def get_scopus_brief(SCOPUS_ID, max_authors=1000):
  143. """ Display a list of publications in plain text format
  144. Argument:
  145. scopus id of the publication
  146. Todo:
  147. - Implement other formats (e.g. html, bibtex)
  148. - Format publications as articles, Title, Abstract
  149. """
  150. id = SCOPUS_ID
  151. if isinstance(id, list):
  152. id = id[0]
  153. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  154. + id
  155. + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
  156. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")
  157. #print url
  158. resp = requests.get(url,
  159. headers={'Accept':'application/json',
  160. 'X-ELS-APIKey': MY_API_KEY})
  161. results = json.loads(resp.text.encode('utf-8'))
  162. #print resp
  163. #print results
  164. coredata = results['abstracts-retrieval-response']['coredata']
  165. pub = ''
  166. authors = results['abstracts-retrieval-response']['authors']['author']
  167. #print "Number of authors: %d" %len(authors)
  168. if len(authors) > max_authors:
  169. return ''
  170. if len(authors) > 20:
  171. pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
  172. else:
  173. pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '
  174. try:
  175. if coredata.get('dc:title'):
  176. pub = pub + coredata.get('dc:title').encode('utf-8')
  177. except ValueError:
  178. print "!!! Error encoding title of publication !!!"
  179. #print coredata.get('dc:title')
  180. pub = pub + coredata.get('dc:title')
  181. if coredata.get('prism:publicationName'):
  182. pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')
  183. if coredata.get('prism:volume'):
  184. pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')
  185. if coredata.get('prism:issueIdentifier'):
  186. pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
  187. if coredata.get('prism:coverDate'):
  188. pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '
  189. if coredata.get('prism:pageRange'):
  190. pub = pub + coredata.get('prism:pageRange').encode('utf-8')
  191. elif coredata.get('article-number'):
  192. pub = pub + coredata.get('article-number').encode('utf-8')
  193. if coredata.get('prism:doi'):
  194. pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')
  195. if coredata.get('citedby-count'):
  196. pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'
  197. pub = pub + '.\n'
  198. return pub