Browse Source

Search with Elasticsearch

Matthias Vogelgesang 7 years ago
parent
commit
885ecee5f2
4 changed files with 27 additions and 7 deletions
  1. 2 0
      nova/__init__.py
  2. 3 0
      nova/templates/user/admin.html
  3. 21 7
      nova/views.py
  4. 1 0
      requirements.txt

+ 2 - 0
nova/__init__.py

@@ -8,6 +8,7 @@ from flask_admin import Admin
 from flask_admin.contrib.sqla import ModelView
 from flask_restful import Api
 from celery import Celery
+from elasticsearch import Elasticsearch
 from nova.fs import Filesystem
 
 __version__ = '0.1.0'
@@ -39,6 +40,7 @@ migrate = Migrate(app, db)
 
 celery = Celery(app.import_name, broker=app.config['CELERY_BROKER_URL'])
 
+es = Elasticsearch()
 
 import nova.models
 

+ 3 - 0
nova/templates/user/admin.html

@@ -65,5 +65,8 @@
       <button type="submit" class="btn btn-primary">Update</button>
     </form>
   </div>
+  <div class="col-lg-12">
+    <a href="{{ url_for("reindex") }}">Re-index search</a>
+  </div>
 </div>
 {% endblock %}

+ 21 - 7
nova/views.py

@@ -2,7 +2,7 @@ import os
 import io
 import re
 from functools import wraps
-from nova import app, db, login_manager, fs, logic, memtar, tasks, models
+from nova import app, db, login_manager, fs, logic, memtar, tasks, models, es
 from nova.models import (User, Collection, Dataset, SampleScan, Genus, Family,
                          Order, Access, Notification, Process)
 from flask import (Response, render_template, request, flash, redirect,
@@ -329,6 +329,19 @@ def open_dataset(dataset_id):
     db.session.commit()
     return redirect(url_for('index'))
 
+@app.route('/reindex')
+@login_required(admin=True)
+def reindex():
+    es.indices.delete(index='datasets', ignore=[400, 404])
+    es.indices.create(index='datasets')
+
+    # FIXME: make this a bulk operation
+    for dataset in Dataset.query.all():
+        body = dict(name=dataset.name, description=dataset.description,
+                    tokenized=dataset.name.replace('_', ' '))
+        es.create(index='datasets', doc_type='dataset', body=body)
+
+    return redirect(url_for('index'))
 
 @app.route('/search', methods=['GET', 'POST'])
 @app.route('/search/<int:page>', methods=['GET', 'POST'])
@@ -342,14 +355,15 @@ def search(page=1):
 
     if request.method == 'POST':
         query = request.form['query']
-        datasets = Dataset.query.whoosh_search(query).all()
-        users = User.query.whoosh_search(query).all()
 
-        # FIXME: this is a slow abomination, fix ASAP
-        accesses = [a for a in db.session.query(Access).all()
-                    if a.dataset in datasets or a.user in users]
+        # XXX: also search in description
+        body = {'query': {'match': {'tokenized': {'query': query, 'fuzziness': 'AUTO', 'operator': 'and'}}}}
+        hits = es.search(index='datasets', doc_type='dataset', body=body)
+        names = [h['_source']['name'] for h in hits['hits']['hits']]
+        datasets = Access.query.join(Dataset).filter(Dataset.name.in_(names))
+        pagination = datasets.paginate(page=page, per_page=16)
 
-        return render_template('index/index.html', accesses=accesses)
+        return render_template('index/search.html', pagination=pagination)
 
     samples = Access.query.join(SampleScan)
 

+ 1 - 0
requirements.txt

@@ -12,3 +12,4 @@ passlib
 celery
 requests
 pyxdg
+elasticsearch>=2.0.0,<3.0.0