Browse Source

Use API to implement processing

As of now we use the users token to do regular API calls. In the copy example,
we avoid checking out a dataset by using the real dataset filesystem paths on
the server machine. This is debatable.
Matthias Vogelgesang 7 years ago
parent
commit
772227c08b
7 changed files with 66 additions and 31 deletions
  1. 2 19
      nova/logic.py
  2. 7 2
      nova/models.py
  3. 3 2
      nova/resources.py
  4. 33 0
      nova/tasks.py
  5. 18 0
      nova/utils.py
  6. 2 8
      nova/views.py
  7. 1 0
      requirements.txt

+ 2 - 19
nova/logic.py

@@ -7,9 +7,10 @@ from nova import app, db, models
 from itsdangerous import Signer, BadSignature
 
 
-def create_dataset(name, user, parent=None):
+def create_dataset(name, user, parent_id=None):
     root = app.config['NOVA_ROOT_PATH']
     path = hashlib.sha256(user.name + name + str(datetime.datetime.now())).hexdigest()
+    parent = db.session.query(models.Dataset).filter(models.Dataset.id == parent_id).first()
     dataset = models.Dataset(name=name, path=path, parent=[parent] if parent else [])
     abspath = os.path.join(root, path)
     os.makedirs(abspath)
@@ -33,21 +34,3 @@ def check_token(token):
         abort(401)
 
     return user
-
-
-def copy(dataset, parent):
-    def copytree(src, dst, symlinks=False, ignore=None):
-        for item in os.listdir(src):
-            s = os.path.join(src, item)
-            d = os.path.join(dst, item)
-            if os.path.isdir(s):
-                copytree(s, d, symlinks, ignore)
-            else:
-                if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
-                    shutil.copy2(s, d)
-
-    root = app.config['NOVA_ROOT_PATH']
-    src = os.path.join(root, parent.path)
-    dst = os.path.join(root, dataset.path)
-    app.logger.info("Copy data from {} to {}".format(src, dst))
-    copytree(src, dst)

+ 7 - 2
nova/models.py

@@ -1,6 +1,7 @@
+import os
 import datetime
 import hashlib
-from nova import db
+from nova import app, db
 from sqlalchemy_utils import PasswordType, force_auto_coercion
 from itsdangerous import Signer, BadSignature
 
@@ -34,7 +35,7 @@ class User(db.Model):
         self.password = password
         self.is_admin = is_admin
         self.gravatar = hashlib.md5(email.lower()).hexdigest()
-        self.token = None
+        self.generate_token()
 
     def __repr__(self):
         return '<User(name={}, fullname={}>'.format(self.name, self.fullname)
@@ -82,6 +83,10 @@ class Dataset(db.Model):
 
     parent = db.relationship('Dataset')
 
+    def to_dict(self):
+        path = os.path.join(app.config['NOVA_ROOT_PATH'], self.path)
+        return dict(name=self.name, path=path, closed=self.closed)
+
     def __repr__(self):
         return '<Dataset(name={}, path={}>'.format(self.name, self.path)
 

+ 3 - 2
nova/resources.py

@@ -28,10 +28,11 @@ class Datasets(Resource):
     def post(self):
         parser = reqparse.RequestParser()
         parser.add_argument('name', type=str, help="Dataset name")
+        parser.add_argument('parent', type=int, help="Dataset parent", default=None)
         args = parser.parse_args()
 
         user = logic.get_user(request.args['token'])
-        dataset = logic.create_dataset(args.name, user)
+        dataset = logic.create_dataset(args.name, user, parent_id=args.parent)
         return dict(id=dataset.id)
 
 
@@ -44,7 +45,7 @@ class Dataset(Resource):
                 filter(models.Access.user == user).\
                 filter(models.Dataset.id == dataset_id).\
                 first()
-        return dict(name=dataset.name)
+        return dataset.to_dict()
 
     def put(self, dataset_id):
         user = logic.get_user(request.args['token'])

+ 33 - 0
nova/tasks.py

@@ -0,0 +1,33 @@
+import requests
+from celery import Celery
+from nova import utils
+
+
+
+app = Celery('tasks', broker='amqp://guest@localhost//')
+
+
+@app.task
+def copy(token, name, parent_id):
+    url = 'http://127.0.0.1:5000/api/datasets'
+    params = dict(token=token)
+
+    # fetch path info about parent and new dataset
+    src = requests.get('{}/{}'.format(url, parent_id), params=params).json()
+
+    # TODO: check if parent is not closed yet and error
+
+    # TODO: refactor out with code from nova client
+    data = dict(name=name, parent=parent_id)
+
+    # create new dataset
+    r = requests.post(url, params=params, data=data)
+    result = r.json()
+
+    # check path info of new dataset
+    dest = requests.get('{}/{}'.format(url, result['id']), params=params).json()
+
+    # NOTE: we are doing a fast path here and I am not sure if this is really
+    # the way to go ...
+
+    utils.copy(src['path'], dest['path'])

+ 18 - 0
nova/utils.py

@@ -0,0 +1,18 @@
+import os
+import shutil
+from nova import app
+
+
+def copy(src_path, dst_path):
+    def copytree(src, dst, symlinks=False, ignore=None):
+        for item in os.listdir(src):
+            s = os.path.join(src, item)
+            d = os.path.join(dst, item)
+            if os.path.isdir(s):
+                copytree(s, d, symlinks, ignore)
+            else:
+                if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
+                    shutil.copy2(s, d)
+
+    app.logger.info("Copy data from {} to {}".format(src_path, dst_path))
+    copytree(src_path, dst_path)

+ 2 - 8
nova/views.py

@@ -3,7 +3,7 @@ import io
 import datetime
 import shutil
 from functools import wraps
-from nova import app, db, login_manager, fs, logic, memtar
+from nova import app, db, login_manager, fs, logic, memtar, tasks
 from nova.models import User, Dataset, Access, Deletion
 from flask import (Response, render_template, request, flash, redirect,
                    abort, url_for, jsonify)
@@ -244,18 +244,12 @@ def share(dataset_id, user_id=None):
 @app.route('/process/<int:dataset_id>/<process>', methods=['GET', 'POST'])
 @login_required(admin=False)
 def process(dataset_id, process=None):
-    processors = {
-        'copy': logic.copy
-    }
-
     parent = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
 
     if not process:
         return render_template('dataset/process.html', dataset=parent)
 
-    dataset = logic.create_dataset(request.form['name'], current_user, parent=parent)
-    processors[process](dataset, parent)
-
+    result = tasks.copy.delay(current_user.token, request.form['name'], parent.id)
     return redirect(url_for('index'))
 
 

+ 1 - 0
requirements.txt

@@ -9,3 +9,4 @@ Flask-WTF
 SQLAlchemy-Utils
 Sphinx
 passlib
+celery