test-scopus.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. # Access Scopus database
  2. #
  3. import requests
  4. import json
  5. from my_scopus import MY_API_KEY
  6. from my_scopus import ak, pdv, ufo, ufo_ips
  7. #
  8. # Get the last N publications of an given author list
  9. #
  10. # Arguments: list of scopus author ids (e.g. "35313939900")
  11. # Returns: list of scopus article ids ('SCOPUS_ID:0037368024')
  12. # The result can be used in the display functions like get_scopus_brief()
  13. #
  14. def get_scopus_list(author_list, opt='', n=5):
  15. if isinstance(author_list, list):
  16. #print "Length of author list %d" % len(author_list)
  17. query = ' OR '.join(['AU-ID('+au+')' for au in author_list])
  18. else:
  19. query = 'AU-ID('+author_list+')'
  20. if len(opt) > 0:
  21. query = query + " AND " + opt
  22. #print "Query: " + query
  23. url = ("http://api.elsevier.com/content/search/scopus?query="
  24. +query+ "&field=dc:identifier&count=" + str(n))
  25. #print "URL: " + url
  26. resp = requests.get(url,
  27. headers={'Accept':'application/json',
  28. 'X-ELS-APIKey': MY_API_KEY})
  29. #print resp
  30. results = resp.json()
  31. return [[str(r['dc:identifier'])] for r in results['search-results']["entry"]]
  32. def get_scopus_info(SCOPUS_ID):
  33. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  34. + SCOPUS_ID
  35. + "?field=article-number,title,publicationName,volume,issueIdentifier,"
  36. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,authors,prism:issn")
  37. #print url
  38. resp = requests.get(url,
  39. headers={'Accept':'application/json',
  40. 'X-ELS-APIKey': MY_API_KEY})
  41. results = json.loads(resp.text.encode('utf-8'))
  42. #print resp
  43. print results
  44. fstring = '{authors}, {title}, {journal}, {volume}, {articlenum}, ({date}). {doi} (cited {cites} times).\n{abstract}\n\n'
  45. return fstring.format(authors=', '.join([au['ce:indexed-name'] for au in results['abstracts-retrieval-response']['authors']['author']]),
  46. title=results['abstracts-retrieval-response']['coredata']['dc:title'].encode('utf-8'),
  47. journal=results['abstracts-retrieval-response']['coredata']['prism:publicationName'].encode('utf-8'),
  48. volume=results['abstracts-retrieval-response']['coredata']['prism:volume'].encode('utf-8'),
  49. articlenum=(results['abstracts-retrieval-response']['coredata'].get('prism:pageRange') or
  50. results['abstracts-retrieval-response']['coredata'].get('article-number')).encode('utf-8'),
  51. date=results['abstracts-retrieval-response']['coredata']['prism:coverDate'].encode('utf-8'),
  52. doi='doi:' + results['abstracts-retrieval-response']['coredata']['prism:doi'].encode('utf-8'),
  53. cites=int(results['abstracts-retrieval-response']['coredata']['citedby-count'].encode('utf-8')),
  54. abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
  55. #
  56. # Display a list of publications in plain text format
  57. #
  58. # Argement: scopus id of the publication
  59. #
  60. # Todo: Implement other formats (e.g. html, bibtex)
  61. # Format publications as articles, Title, Abstract
  62. #
  63. def get_scopus_brief(SCOPUS_ID, max_authors=1000):
  64. id = SCOPUS_ID
  65. if isinstance(id, list):
  66. id = id[0]
  67. url = ("http://api.elsevier.com/content/abstract/scopus_id/"
  68. + id
  69. + "?field=authors,article-number,title,publicationName,volume,issueIdentifier,"
  70. + "prism:pageRange,coverDate,article-number,doi,citedby-count,prism:aggregationType,url,identifier,description,prism:issn")
  71. #print url
  72. resp = requests.get(url,
  73. headers={'Accept':'application/json',
  74. 'X-ELS-APIKey': MY_API_KEY})
  75. results = json.loads(resp.text.encode('utf-8'))
  76. #print resp
  77. #print results
  78. coredata = results['abstracts-retrieval-response']['coredata']
  79. pub = ''
  80. authors = results['abstracts-retrieval-response']['authors']['author']
  81. #print "Number of authors: %d" %len(authors)
  82. if len(authors) > max_authors:
  83. return ''
  84. if len(authors) > 20:
  85. pub = pub + authors[0]['ce:indexed-name'] + ' et.al.: '
  86. else:
  87. pub = ', '.join([au['ce:indexed-name'] for au in authors]) + ': '
  88. try:
  89. if coredata.get('dc:title'):
  90. pub = pub + coredata.get('dc:title').encode('utf-8')
  91. except ValueError:
  92. print "!!! Error encoding title of publication !!!"
  93. #print coredata.get('dc:title')
  94. pub = pub + coredata.get('dc:title')
  95. if coredata.get('prism:publicationName'):
  96. pub = pub + ', ' + coredata.get('prism:publicationName').encode('utf-8')
  97. if coredata.get('prism:volume'):
  98. pub = pub + ', ' + coredata.get('prism:volume').encode('utf-8')
  99. if coredata.get('prism:issueIdentifier'):
  100. pub = pub + ', ' + coredata.get('prism:issueIdentifier').encode('utf-8')
  101. if coredata.get('prism:coverDate'):
  102. pub = pub + ' (' + coredata.get('prism:coverDate').encode('utf-8') + ') '
  103. if coredata.get('prism:pageRange'):
  104. pub = pub + coredata.get('prism:pageRange').encode('utf-8')
  105. elif coredata.get('article-number'):
  106. pub = pub + coredata.get('article-number').encode('utf-8')
  107. if coredata.get('prism:doi'):
  108. pub = pub + ', doi:' + coredata.get('prism:doi').encode('utf-8')
  109. if coredata.get('citedby-count'):
  110. pub = pub + ' (cited ' + coredata.get('citedby-count').encode('utf-8') + ' times)'
  111. pub = pub + '.\n'
  112. return pub
  113. # What kind of lists are interesting for a group website?
  114. # Organisation:
  115. #
  116. # Latest publications | Contact person
  117. # List of 3-5 with titles first
  118. # First lines of the abtract | Events:
  119. # Number of citations | Upcoming event
  120. # (Blog format)
  121. #
  122. # Lately cited papers | Featured publications
  123. # List of 3-5 papers updated cites|
  124. # in the last N month
  125. # Format: Blog
  126. #
  127. #
  128. # List of the latest N publications (N = 3-5)
  129. # List of the latest publications without big collaboration papers?
  130. # List of collaboration papers
  131. # List of publication of the last N month (N = 6-12)
  132. # List split by sub groups
  133. # List of all publication of one year, that can be selected
  134. # List of featured publications (selected by a list of Scopus-Ids / or Blog with Scopus-Id)
  135. # Problem: If the people are in too many differnent projects
  136. # How to list only this ones of a certain topic?
  137. # Is there a subgroup, that is only involved in one project?
  138. # Try: Leave out ak, csa, we, baumbach, etc?
  139. #print "LUMINEU 0\u03bd2\u03b2 project".encode('utf-8')
  140. # get number of publications?
  141. # order by date?
  142. # limit to certain interval
  143. #
  144. # Save publications with their number of citations in JSON file of SQLITE database?!
  145. # List of newly cited publications
  146. #
  147. print get_scopus_info("SCOPUS_ID:84969498463")
  148. #resp = requests.get("http://api.elsevier.com/content/author?author_id="+ak+"&view=metrics",
  149. # headers={'Accept':'application/json',
  150. # 'X-ELS-APIKey': MY_API_KEY})
  151. #print resp
  152. #print json.dumps(resp.json(),
  153. # sort_keys=True,
  154. # indent=4, separators=(',', ': '))
  155. #print get_scopus_info('SCOPUS_ID:0037368024')
  156. #publist = get_scopus_list(ak)
  157. #publist = get_scopus_list([ak,ak2], 'PUBYEAR = 2014', 30)
  158. #publist = get_scopus_list(pdv, 'PUBYEAR = 2015', 30)
  159. #publist = get_scopus_list(ufo_ipe, 'PUBYEAR = 2015', 30)
  160. #publist = get_scopus_list(ufo_ips, 'PUBYEAR = 2015', 30)
  161. # Exclude authors?
  162. #publist = get_scopus_list(pdv, 'NOT AU-ID(7006284555)', 10)
  163. # Author ausschliessen - black list !!!
  164. #
  165. # Display the result
  166. #
  167. #print "Number of publications: %d" % len(publist)
  168. #for pub in publist:
  169. # print get_scopus_brief(pub,10000)
  170. # Test printing functions
  171. #print publist[2][0]
  172. #print get_scopus_info('SCOPUS_ID:0037368024')
  173. #print get_scopus_info(publist[2][0])
  174. # Merge publications
  175. # Search for all publications? Elimintate dublicates
  176. # Format abstract
  177. #
  178. # There seem to be also preformatted output?
  179. # Is output in BibTeX possible?
  180. #
  181. # Impact-Factor of the journal?
  182. # Queries:
  183. # List all publications
  184. # List the lastest publications
  185. # List list publications of the group wo double entries
  186. # List latest pubs
  187. # List publication with higest citation count
  188. # List publication in magazines with highest impact value
  189. # Print collaborator network of institutions
  190. # Print list of collaborations wo large collabs
  191. #