Browse Source

Started to generate documentation of the scripts with Python docstrings and Sphinx

Andreas Kopmann 6 years ago
parent
commit
bfd1f56e7b
11 changed files with 409 additions and 134 deletions
  1. 8 2
      README.md
  2. 31 21
      ak_scopus.py
  3. 10 11
      ak_wordpress.py
  4. 20 0
      doc/Makefile
  5. 23 0
      doc/code.rst
  6. 157 0
      doc/conf.py
  7. 20 0
      doc/index.rst
  8. 24 6
      etc/config_ipekopmann2.py
  9. 20 6
      etc/config_ufo_kit_edu.py
  10. 95 87
      scopus_get_publications.py
  11. 1 1
      update.sh

+ 8 - 2
README.md

@@ -1,5 +1,5 @@
 # README scopus
-*Ak, 24.4.2017*
+*Ak, 23.5.2017*
 
 Get information on publications of work groups from Elsevier's Scopus database for usage in websites. For each publication a post on a Wordpress CMS is created. Citations are mapped to Wordpress comments. The get-publication script is intended to run on a regualr basis (e.g. by cron).
 
@@ -9,8 +9,14 @@ Note: All scopus scripts run only with valid access to the Scopus database (e.g.
 ## Version history
 
 Todo:
-- generate python inline documentation
+- Generate a sensible API documentation; 
+	Add a basic user documentation along with the API description
+- Add maintenance scripts, that check consittence of Scopus data and 
+  cache database; update of post categories; warning in case of inconsistencies
+ 
 
+Version 1.3, 23.5.17 (ak):
+- generated python inline documentation
 
 Version 1.2, 24.4.17 (ak):
 - move complete configuration of author lists to config file

+ 31 - 21
ak_scopus.py

@@ -1,21 +1,26 @@
+""" Functions that implement the access to the Scopus database """
+    
+
 import requests
 import json
 
 from config import *
 
 
-
-#
-# Get the last N publications of an given author list
-#
-# Arguments: list of scopus author ids (e.g. "35313939900")
-# Returns: list of scopus ids, electronic id and number of citations
-# DOI would be desirable but is not available in some records
-# e.g. ('SCOPUS_ID:0037368024',)
-# The result can be used in the display functions like get_scopus_brief()
-#
 def get_scopus_list(author_list, opt='', max=0):
+    """ Get the last N publications of an given author list
     
+    Arguments: 
+        list of scopus author ids (e.g. "35313939900")
+    
+    Returns: 
+        list of scopus ids, electronic id and number of citations
+    
+    DOI would be desirable but is not available in some records
+    e.g. ('SCOPUS_ID:0037368024',)
+    The result can be used in the display functions like get_scopus_brief()
+    """
+   
     count = 25 # define the number of requests publications in one call
 
 
@@ -104,6 +109,8 @@ def get_scopus_list(author_list, opt='', max=0):
 
 
 def get_scopus_data(SCOPUS_ID):
+    """ Get complete data for a single publication """
+    
     url = ("http://api.elsevier.com/content/abstract/scopus_id/"
            + SCOPUS_ID
            + "?field=article-number,title,publicationName,volume,issueIdentifier,"
@@ -120,9 +127,7 @@ def get_scopus_data(SCOPUS_ID):
 
 
 def get_scopus_refs(EID):
-    # Todo: implement loop, if there are more than 25 citations !!!
-    #
-
+    """ Get list of all citations of a single publication """
 
     count = 25
     n = 0
@@ -162,6 +167,8 @@ def get_scopus_refs(EID):
 
 
 def get_scopus_info(SCOPUS_ID):
+    """ Get complete information from Scopus for a single publication """
+    
     url = ("http://api.elsevier.com/content/abstract/scopus_id/"
            + SCOPUS_ID
            + "?field=article-number,title,publicationName,volume,issueIdentifier,"
@@ -188,15 +195,18 @@ def get_scopus_info(SCOPUS_ID):
                                  abstract=results['abstracts-retrieval-response']['coredata']['dc:description'].encode('utf-8'))
 
 
-#
-# Display a list of publications in plain text format
-#
-# Argement: scopus id of the publication
-#
-# Todo: Implement other formats (e.g. html, bibtex)
-#   Format publications as articles, Title, Abstract
-#
+
 def get_scopus_brief(SCOPUS_ID, max_authors=1000):
+    """ Display a list of publications in plain text format
+    
+    Argument: 
+        scopus id of the publication
+    
+    Todo: 
+        - Implement other formats (e.g. html, bibtex)
+        - Format publications as articles, Title, Abstract
+    """
+    
     id = SCOPUS_ID
     if isinstance(id, list):
         id = id[0]

+ 10 - 11
ak_wordpress.py

@@ -1,6 +1,8 @@
-# Create posts via wordpress API
-# A. Kopmann 6.2.2017 (ak)
-#
+""" Create posts via wordpress API
+    
+*A. Kopmann 6.2.2017 (ak)*
+"""
+
 
 from datetime import datetime
 import json
@@ -21,7 +23,8 @@ wp = Client(wp_api_url, wp_user, wp_password)
 #
 
 def wordpress_get_post(wpid):
-    
+    """ Query post """
+ 
     try:
         post = wp.call(GetPost(wpid))
         #print post.title
@@ -38,7 +41,8 @@ def wordpress_get_post(wpid):
 # create a post from a scopus query
 #
 def wordpress_post_by_scopus(data, category = []):
-    
+    """ Create a new post based on the Scopus information """
+ 
     coredata = data['abstracts-retrieval-response']['coredata']
     try:
         authors = data['abstracts-retrieval-response']['authors']['author']
@@ -135,6 +139,7 @@ def wordpress_post_by_scopus(data, category = []):
 #
 
 def wordpress_comment_by_scopus(wpid, data):
+    """ Create a new comment based on Scopus data """
 
     #print "Create Wordpress comment for post %d" % wpid
 
@@ -200,11 +205,5 @@ def wordpress_comment_by_scopus(wpid, data):
 
 
 
-# Todo: can this be turned off for scopus???
-# Add to themes function.php:
-#add_filter('comment_flood_filter', '__return_false');
-# wordpress_xmlrpc.exceptions.InvalidCredentialsError: You are posting comments too quickly. Slow down.
-
-
 
 

+ 20 - 0
doc/Makefile

@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = python -msphinx
+SPHINXPROJ    = Scopus
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

+ 23 - 0
doc/code.rst

@@ -0,0 +1,23 @@
+Documentation for the Code
+**************************
+
+
+Scopus #1 -- auto members
+=========================
+
+This is something I want to say that is not in the docstring.
+
+.. automodule:: ak_scopus 
+   :members:
+
+.. automodule:: ak_wordpress
+   :members:
+
+.. automodule:: config
+   :members:
+
+
+.. automodule:: scopus_get_publications
+   :members:
+ 
+

+ 157 - 0
doc/conf.py

@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+#
+# Scopus documentation build configuration file, created by
+# sphinx-quickstart on Tue May 23 17:35:52 2017.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('..'))
+
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc',
+    'sphinx.ext.ifconfig']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Scopus'
+copyright = u'2017, Andreas Kopmann'
+author = u'Andreas Kopmann'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'1.3'
+# The full version, including alpha/beta/rc tags.
+release = u'1.3'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Scopusdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'Scopus.tex', u'Scopus Documentation',
+     u'Andreas Kopmann', 'manual'),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'scopus', u'Scopus Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'Scopus', u'Scopus Documentation',
+     author, 'Scopus', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+
+

+ 20 - 0
doc/index.rst

@@ -0,0 +1,20 @@
+.. Scopus documentation master file, created by
+   sphinx-quickstart on Tue May 23 17:35:52 2017.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Scopus's documentation!
+==================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   code
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`

+ 24 - 6
etc/config-ipekopmann2.py → etc/config_ipekopmann2.py

@@ -1,6 +1,17 @@
-# Scopus script's configration
-# A Kopmann, 12.4.17
-#
+""" Scopus script's configration
+    
+*A Kopmann, 12.4.17*
+
+Configuration for the test system installed at ipekopmann2 (my  macbook).
+The system is identically to the UFO configuration and used for testing
+of new functions.
+
+Todo:
+    - Handle authors that have been also at other institutions.
+      Add time interval for each author or 
+      check if affiliation of the author is also given
+
+"""
 
 # Local publication database 
 
@@ -13,6 +24,7 @@ db_name = 'scopus'
 # Access to Wordpress installation 
 
 wp_api_url = "http://localhost/~kopmann/ufo2/xmlrpc.php"
+""" Access to the Wordpress installation """
 wp_user = "scopus"
 wp_password = "$scopus$"
 
@@ -20,14 +32,13 @@ wp_password = "$scopus$"
 # Reporting
 
 log_file = "/Users/kopmann/scopus-publications.log"
+""" Logfile name for reporting """
 
 
 # Scopus query definition
-# Todo: 
-# - Define a single object with the defintion of the author groups
-# - Search intervall - change to last 2 years?! 
 
 MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
+""" Scopus access key (Andreas Kopmann) """
 
 
 # Scopus author IDs
@@ -96,4 +107,11 @@ sc_workgroups = [
               {'name':"Morphology",'authors':ufo_apps},
               {'name':"Algorithms",'authors':ufo_alg}
 ]
+""" Definition of the workgroups 
+    
+Each workgroup is defined by a list of Scopus ID's and the 
+name of the category to be used in Wordpress. The category 
+for a new workgroup has to be created in Wordpress before
+adding publications 
+"""
 

+ 20 - 6
etc/config-ufo-kit-edu.py → etc/config_ufo_kit_edu.py

@@ -1,7 +1,12 @@
-# Scopus script's configration
-# A Kopmann, 12.4.17
-#
+""" Scopus script's configration
+    
+    *A Kopmann, 12.4.17*
 
+Configuration for the active setup at ufo.kit.edu
+
+"""
+
+# Local publication database
 
 db_host = 'localhost'
 db_user = 'scopus'
@@ -9,7 +14,10 @@ db_pw = '$scopus$'
 db_name = 'scopus'
 
 
+# Access to Wordpress installation
+
 wp_api_url = "https://ufo.kit.edu/dis/xmlrpc.php"
+""" Access to the Wordpress installation """
 wp_user = "scopus"
 wp_password = "$scopus$"
 
@@ -17,14 +25,13 @@ wp_password = "$scopus$"
 # Reporting
 
 log_file = "/root/scopus/log/scopus-publications-ufo-kit-edu.log"
+""" Logfile name for reporting """
 
 
 # Scopus query definition
-# Todo:
-# - Define a single object with the defintion of the author groups
-# - Search intervall - change to last 2 years?!
 
 MY_API_KEY = "14d431d052c2caf5e9c4b1ab7de7463d"
+""" Scopus access key (Andreas Kopmann) """
 
 
 # Scopus author IDs
@@ -93,6 +100,13 @@ sc_workgroups = [
 {'name':"Morphology",'authors':ufo_apps},
 {'name':"Algorithms",'authors':ufo_alg}
 ]
+""" Definition of the workgroups
+    
+    Each workgroup is defined by a list of Scopus ID's and the
+    name of the category to be used in Wordpress. The category
+    for a new workgroup has to be created in Wordpress before
+    adding publications
+    """
 
 
 

+ 95 - 87
scopus-get-publications.py → scopus_get_publications.py

@@ -1,17 +1,37 @@
-# Get new publications
-# Publication and citations retrieval
-# A. Kopmann, 6.2.17 (ak)
-#
-# Scope:
-# Publications are once added to wordpressas a post or comment.
-# Afterwards scopus will not change or modify anything any more !!!
-# Update is completely in the resonsibility of the ufo users
-#
-
-# Todo:
-# - add mail to author button
-# - save full scopus data in the database
-#
+""" Publication and citations retrieval
+    
+*A. Kopmann, 6.2.17 (ak)*
+
+
+Scope:
+    Publications are once added to wordpressas a post or comment.
+    Afterwards scopus will not change or modify anything any more.
+    Update is completely in the resonsibility of the ufo users.
+
+The operation of the script splits in four phases:
+    - Read all publications for one or more author groups
+      The groups are all defined in the configuration file
+      The publications are stored in a local cache database
+    - For all new publication a post in Wordpress is created.
+      The post is added to the catogeries accouring to the matching
+      author groups
+    - For each publication the citations are requested and stored
+      in the local cache database as well
+    - For each new citation a Wordpress comment is created.
+
+
+Todo:
+    - add mail to author button
+    - save full scopus data in the database
+    - Add a script to save the data for all publications in the database!!!
+      There was some problem before?!
+    - Add scripts to check consistence in the database
+      and fix problems if detected  
+      E.g. search for wpcommentid == 0
+      Check if, wp posts + comments are still availabe, display
+      deleted entries
+"""
+
 
 # Configuration - Scopus
 
@@ -20,8 +40,6 @@ import requests
 import json
 import os.path
 
-#from my_scopus import MY_API_KEY
-#from my_scopus import ak, csa, pdv, ufo, ufo_pdv, ufo_ips, ufo_eps, ufo_apps
 from ak_scopus import get_scopus_list, get_scopus_data, get_scopus_refs
 
 
@@ -43,8 +61,8 @@ nnewcites = 0
 
 
 
-# Read publications of a list of authors and store in the database
 def update_publications(authids,authname='',scopus_opts = '',max=0):
+    """ Read publications of a list of authors and store in the database """
 
 
     print "=== Update of publications for the author group: " + authname
@@ -107,8 +125,10 @@ def update_publications(authids,authname='',scopus_opts = '',max=0):
         connection.close()
 
 
-# Read all citations and store in the citation table
 def update_citations():
+    """ Read all citations and store in the citation table """
+
+
     global npubs
     
     print ""
@@ -184,8 +204,10 @@ def update_citations():
 
 
 
-# Create wordpress posts for all entries that have none
 def update_wp_posts():
+    """ Create wordpress posts for all entries that have none """
+
+
     global nnewpubs
 
     print ""
@@ -253,6 +275,8 @@ def update_wp_posts():
 
 
 def update_wp_comments():
+    """ Create a new comment for newly found citations """
+    
     global ncites
     global nnewcites
     
@@ -311,92 +335,76 @@ def update_wp_comments():
         connection.close()
 
 
-# Todo: Add a script to save the data for all publications in the database!!!
-# There was some problem before?!
-#
-
-# Todo: Add scripts to check consistence in the database
-# and fix problems if detected
-# E.g. search for wpcommentid == 0
-# Check if, wp posts + comments are still availabe, display
-# deleted entries
-#
-
-
-
 # Main
 
-start = datetime.datetime.now()
+# Prevent sphinx from execution
+if __name__ == "__main__": 
 
-print ""
-print "***********************************************"
-print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****"
-print "***********************************************"
-print ""
 
+    start = datetime.datetime.now()
 
+    print ""
+    print "***********************************************"
+    print "**** scopus-get-publications / " + start.strftime("%Y-%m-%d") + " *****"
+    print "***********************************************"
+    print ""
 
-# Update publaction database; search for new publications
-# Loop over all user groups defined in ak_scopus.py
 
-# Todo: Detect, if there is no access to scopus !!!
-#
 
-search_param = '(PUBYEAR AFT %d)' % (sc_start)
+    # Update publaction database; search for new publications
+    # Loop over all user groups defined in ak_scopus.py
 
-for wp in sc_workgroups:
-    update_publications(wp['authors'],wp['name'],search_param)
+    # Todo: Detect, if there is no access to scopus !!!
+    #
 
-update_wp_posts()
+    search_param = '(PUBYEAR AFT %d)' % (sc_start)
 
-# read all citations
-# Todo: read only new citations?!
+    for wp in sc_workgroups:
+        update_publications(wp['authors'],wp['name'],search_param)
 
-update_citations()
+    update_wp_posts()
 
+    # read all citations
+    # Todo: read only new citations?!
 
-# loop over all cites and post comments to wordpress, when necessary
-# update database
+    update_citations()
 
-update_wp_comments()
-# Todo: deactivate comments for scopus posts!!!
 
+    # loop over all cites and post comments to wordpress, when necessary
+    # update database
 
-# Display summary
-end = datetime.datetime.now()
-print ""
-print "Summary: (see also logfile %s) " % log_file
-print "Date       = " + str(start)
-print "NPubs      = " + str(npubs)
-print "NNewPubs   = " + str(nnewpubs)
-print "NCites     = " + str(ncites)
-print "NNewCites  = " + str(nnewcites)
-print "Runtime    = " + str(end - start)
+    update_wp_comments()
+    # Todo: deactivate comments for scopus posts!!!
 
 
-# Write summary to log file
-if not os.path.isfile(log_file):
-    print "Create logfile " + log_file
-    # Open file and write header
-    log = open(log_file,"w")
-    log.write(__file__ + "\n")
-    log.write("\n")
-    log.write("      Date\t    Time\tNPubs\tNNewP\tNCite\tNNewC\t          TRun\n")
-    log.write("------------------------------------------------------------------------------\n")
+    # Display summary
+    end = datetime.datetime.now()
+    print ""
+    print "Summary: (see also logfile %s) " % log_file
+    print "Date       = " + str(start)
+    print "NPubs      = " + str(npubs)
+    print "NNewPubs   = " + str(nnewpubs)
+    print "NCites     = " + str(ncites)
+    print "NNewCites  = " + str(nnewcites)
+    print "Runtime    = " + str(end - start)
+
+
+    # Write summary to log file
+    if not os.path.isfile(log_file):
+        print "Create logfile " + log_file
+        # Open file and write header
+        log = open(log_file,"w")
+        log.write(__file__ + "\n")
+        log.write("\n")
+        log.write("      Date\t    Time\tNPubs\tNNewP\tNCite\tNNewC\t          TRun\n")
+        log.write("------------------------------------------------------------------------------\n")
+        log.close()
+
+
+    log = open(log_file,"a")
+    log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
+                                        start.strftime("%H:%M:%S"),
+                                        npubs,nnewpubs,ncites,nnewcites,str(end-start)))
     log.close()
 
-
-log = open(log_file,"a")
-log.write("%s\t%s\t%5d\t%5d\t%5d\t%5d\t%s\n" % (start.strftime("%Y-%m-%d"),
-                                    start.strftime("%H:%M:%S"),
-                                    npubs,nnewpubs,ncites,nnewcites,str(end-start)))
-log.close()
-
-
-
-# done
-
-
-
-
-
+    # done

+ 1 - 1
update.sh

@@ -2,5 +2,5 @@
 # A Kopmann, 11.4.2017
 #
 
-python -W ignore /root/scopus/scopus-get-publications.py
+python -W ignore /root/scopus/scopus_get_publications.py