ak_scopus.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. import requests
  2. import json
  3. from my_scopus import MY_API_KEY
  4. #
  5. # Get the last N publications of an given author list
  6. #
  7. # Arguments: list of scopus author ids (e.g. "35313939900")
  8. # Returns: list of scopus ids, electronic id and number of citations
  9. # DOI would be desirable but is not available in some records
  10. # e.g. ('SCOPUS_ID:0037368024',)
  11. # The result can be used in the display functions like get_scopus_brief()
  12. #
  13. def get_scopus_list(author_list, opt='', max=0):
  14. count = 25 # define the number of requests publications in one call
  15. if isinstance(author_list, list):
  16. #print "Length of author list %d" % len(author_list)
  17. query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
  18. else:
  19. query = 'AU-ID('+author_list+')'
  20. if len(opt) > 0:
  21. query = query + " AND " + opt
  22. # The scopus query is limited by the number of results
  23. # To get all results a loop over all results is required
  24. # The loop is controlled by count and start
  25. # limit the results by time: Arguments?
  26. # e.g. PUBYEAR AFT 2010
  27. if (max > 0) and (max < count):
  28. npubstoget = max
  29. count = max
  30. else:
  31. npubstoget = count
  32. n = 0
  33. start = 0
  34. ntotal = 0
  35. publist = []
  36. while (npubstoget > 0):
  37. loopargs = "&count=%d&start=%d" % (count, start)
  38. #print loopargs
  39. #print "Query: " + query
  40. url = ("http://api.elsevier.com/content/search/scopus?query="
  41. +query+ "&field=dc:identifier,citedby-count,eid" + loopargs)
  42. #print "URL: " + url
  43. resp = requests.get(url,
  44. headers={'Accept':'application/json',
  45. 'X-ELS-APIKey': MY_API_KEY})
  46. #print resp
  47. results = resp.json()
  48. if (n==0):
  49. n = int(results['search-results']['opensearch:totalResults'])
  50. #print "Number publications found = %d" % n
  51. if (max == 0):
  52. npubstoget = n
  53. #print json.dumps(resp.json(),
  54. # sort_keys=True,
  55. # indent=4, separators=(',', ': '))
  56. newpubs = []
  57. for r in results['search-results']["entry"]:
  58. #print r
  59. try:
  60. newpubs += [[ str(r['dc:identifier']),str(r['eid']),str(r['citedby-count'])]]
  61. except KeyError:
  62. print "Warning: There is data missing"
  63. print r
  64. # Todo : DOI is not always available !!!
  65. #newpubs = [[str(r['dc:identifier']),str(r['citedby-count'])] for r in results['search-results']["entry"]]
  66. #citations = [[str(r['citedby-count'])] for r in results['search-results']["entry"]]
  67. publist += newpubs
  68. # Todo: Counting by the list is dangerous - if an element is missing !!!
  69. nreceived = len(newpubs)
  70. nlist = len(publist)
  71. #print "Received: %d" %nreceived
  72. #print "In list= %d" %nlist
  73. # Next iteration
  74. #print "Calculating loop parameters npubstoget = %d (max = %d)" % (npubstoget, max)
  75. npubstoget = npubstoget - count
  76. start += count
  77. return publist
  78. def get_scopus_data(SCOPUS_ID):
  79. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  80. + SCOPUS_ID
  81. + "?field=article-number,title,publicationName,volume,issueIdentifier,"
  82. + "prism:pageRange,coverDate,article-number,eid,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
  83. #print url
  84. resp = requests.get(url,
  85. headers={'Accept':'application/json',
  86. 'X-ELS-APIKey': MY_API_KEY})
  87. #results = json.loads(resp.text.encode('utf-8'))
  88. results = resp.json()
  89. return results
  90. def get_scopus_refs(EID):
  91. # Todo: implement loop, if there are more than 25 citations !!!
  92. #
  93. count = 25
  94. n = 0
  95. npubstoget = 25
  96. start = 0
  97. ntotal = 0
  98. publist = []
  99. while (npubstoget > 0):
  100. loopargs = "&count=%d&start=%d" % (count, start)
  101. #print loopargs
  102. url = ("https://api.elsevier.com/content/search/scopus?query=refeid("
  103. + EID + ")" + loopargs)
  104. #print "URL: " + url
  105. resp = requests.get(url,headers={'Accept':'application/json','X-ELS-APIKey':MY_API_KEY})
  106. results = resp.json()
  107. #print json.dumps(results,sort_keys=True,indent=4, separators=(',', ': '))
  108. if (n==0):
  109. n = int(results['search-results']['opensearch:totalResults'])
  110. #print "Current number citations in scopus = %d" % n
  111. npubstoget = n
  112. if (n>0):
  113. publist += results['search-results']['entry']
  114. npubstoget = npubstoget - count
  115. start += count
  116. return publist
  117. def get_scopus_info(SCOPUS_ID):
  118. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  119. + SCOPUS_ID
  120. + "?field=article-number,title,publicationName,volume,issueIdentifier,"
  121. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authkeywords,authors,prism:issn")
  122. #print url
  123. resp = requests.get(url,
  124. headers={'Accept':'application/json',
  125. 'X-ELS-APIKey': MY_API_KEY})
  126. results = json.loads(resp.text.encode('utf-8'))
  127. #print resp
  128. #print results
  129. fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'
  130. return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
  131. title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
  132. journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
  133. volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
  134. articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
  135. results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
  136. date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
  137. doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
  138. cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
  139. abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
  140. #
  141. # Display a list of publications in plain text format
  142. #
  143. # Argement: scopus id of the publication
  144. #
  145. # Todo: Implement other formats (e.g. html, bibtex)
  146. # Format publications as articles, Title, Abstract
  147. #
  148. def get_scopus_brief(SCOPUS_ID, max_authors=1000):
  149. id = SCOPUS_ID
  150. if isinstance(id, list):
  151. id = id[0]
  152. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  153. + id
  154. + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
  155. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")
  156. #print url
  157. resp = requests.get(url,
  158. headers={'Accept':'application/json',
  159. 'X-ELS-APIKey': MY_API_KEY})
  160. results = json.loads(resp.text.encode('utf-8'))
  161. #print resp
  162. #print results
  163. coredata = results['abstracts-retrieval-response']['coredata']
  164. pub = ''
  165. authors = results['abstracts-retrieval-response']['authors']['author']
  166. #print "Number of authors: %d" %len(authors)
  167. if len(authors) > max_authors:
  168. return ''
  169. if len(authors) > 20:
  170. pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
  171. else:
  172. pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '
  173. try:
  174. if coredata.get('dc:title'):
  175. pub = pub + coredata.get('dc:title').encode('utf-8')
  176. except ValueError:
  177. print "!!! Error encoding title of publication !!!"
  178. #print coredata.get('dc:title')
  179. pub = pub + coredata.get('dc:title')
  180. if coredata.get('prism:publicationName'):
  181. pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')
  182. if coredata.get('prism:volume'):
  183. pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')
  184. if coredata.get('prism:issueIdentifier'):
  185. pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
  186. if coredata.get('prism:coverDate'):
  187. pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '
  188. if coredata.get('prism:pageRange'):
  189. pub = pub + coredata.get('prism:pageRange').encode('utf-8')
  190. elif coredata.get('article-number'):
  191. pub = pub + coredata.get('article-number').encode('utf-8')
  192. if coredata.get('prism:doi'):
  193. pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')
  194. if coredata.get('citedby-count'):
  195. pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'
  196. pub = pub + '.\n'
  197. return pub