From c1d9a52d079a9bb2da7e04642154f5b2afccc265 Mon Sep 17 00:00:00 2001 From: mathieu Date: Wed, 18 May 2016 11:59:14 +0200 Subject: [PATCH] Convert to english, lowercase and explain backend --- telemeta/forms/haystack_form.py | 30 ++++---- telemeta/templates/search/advanceSearch.html | 76 +++++++++---------- telemeta/templates/telemeta/lists.html | 1 - .../templates/telemeta/mediaitem_list.html | 44 ++++++----- telemeta/util/backend.py | 20 ++++- telemeta/views/search.py | 4 +- 6 files changed, 95 insertions(+), 80 deletions(-) diff --git a/telemeta/forms/haystack_form.py b/telemeta/forms/haystack_form.py index fdc42c9d..46ef6a5a 100644 --- a/telemeta/forms/haystack_form.py +++ b/telemeta/forms/haystack_form.py @@ -155,24 +155,24 @@ class HayAdvanceForm(SearchForm): physical_format = forms.CharField(required=False, label=(_('physical format')), widget=forms.Select(attrs={'style': 'width:100%'}, choices=list_physical_format())) code = forms.CharField(required=False, label=(_('code')), widget=forms.TextInput(attrs={'class': 'form-control', 'type': 'search'})) - def filterInstru(self, query): + def filter_instru(self, query): from telemeta.views.search import BooleanSearchView, Erreur - - try: - BooleanSearchView().isCorrectQuery(query) - except Erreur: - return SQ(instruments__startswith=query) + if isinstance(query, str): + try: + BooleanSearchView().is_correct_query(query) + except Erreur: + return SQ(instruments__startswith=query) operateur = "ET" if isinstance(query, list): - queryTerms = query + query_terms = query else: - queryTerms = query.split() + query_terms = query.split() sqTab = [] valeur = "" - while len(queryTerms) != 0: - term = queryTerms.pop(0) + while len(query_terms) != 0: + term = query_terms.pop(0) if term == "ET" or term == "OU": if valeur != "": sqTab.append(('instruments__startswith', valeur.strip())) @@ -184,9 +184,9 @@ class HayAdvanceForm(SearchForm): sqTab.append(objet) operateur = "OU" if operateur == "ET" else "ET" elif term == "(": - indexCloseBracket = getclosebracket(queryTerms) - sqTab.append(self.filterInstru(queryTerms[:indexCloseBracket])) - del queryTerms[:indexCloseBracket + 1] + indexCloseBracket = get_close_bracket(query_terms) + sqTab.append(self.filter_instru(query_terms[:indexCloseBracket])) + del query_terms[:indexCloseBracket + 1] else: valeur += term + " " if valeur != "": @@ -214,7 +214,7 @@ class HayAdvanceForm(SearchForm): sqs = sqs.filter(ethnic_group__contains=self.cleaned_data['ethnic_group']) if self.cleaned_data.get('instruments'): - sqs = sqs.filter(self.filterInstru(self.cleaned_data['instruments'])) + sqs = sqs.filter(self.filter_instru(self.cleaned_data['instruments'])) if self.cleaned_data.get('collectors'): sqs = sqs.filter(collectors__startswith=self.cleaned_data['collectors']) @@ -258,7 +258,7 @@ class HayAdvanceForm(SearchForm): return sqs -def getclosebracket(tab): +def get_close_bracket(tab): index = 0 par = 1 while par != 0 and index{# Show some example queries to run, maybe query syntax, something else? #} {% endif %} -
+
@@ -256,7 +256,7 @@ {% endif %} {% endfor %} - {% trans "Add field" %} + {% trans "Add field" %}
@@ -285,22 +285,22 @@ {% endblock %} \ No newline at end of file diff --git a/telemeta/util/backend.py b/telemeta/util/backend.py index d969c1a2..f20bcd13 100644 --- a/telemeta/util/backend.py +++ b/telemeta/util/backend.py @@ -4,16 +4,25 @@ from haystack.backends.elasticsearch_backend import * import string import re -regexSpacePunc = "[\\s"+re.escape(string.punctuation)+"]+" +#Regex for remove punctuations when index fields and +#when submit a query search +regex_space_punc = "[\\s" + re.escape(string.punctuation) + "]+" class CustomElasticBackend(ElasticsearchSearchBackend): + #This setup modifies the mapping of ES in order to have better results + #in the search engine. Add 2 analyzers (for indexing and searching): + # -whitespace_asciifolding_analyzer : remove punctuations and convert + # all terms into lowercase and escape accents. + # -lowercase_analyzer : convert in lowercase the word (used by code field + # in order to preserve undersore of codes) + def setup(self): DEFAULT_FIELD_MAPPING['analyzer']='whitespace_asciifolding_analyzer' FIELD_MAPPINGS['keyword'] = {'type': 'string', 'analyzer':'lowercase_analyzer'} eb = super(CustomElasticBackend, self) eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('tokenizer')['esc_scape_tokenizer']=\ - {"type": "pattern", "pattern": regexSpacePunc} + {"type": "pattern", "pattern": regex_space_punc} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['whitespace_asciifolding_analyzer']=\ {"type": "custom", "tokenizer": "esc_scape_tokenizer", "filter": ["lowercase", "asciifolding"]} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['lowercase_analyzer'] = \ @@ -22,14 +31,19 @@ class CustomElasticBackend(ElasticsearchSearchBackend): class CustomElasticSearchQuery(ElasticsearchSearchQuery): + #Custom search query for remove all punctuations characters and + #convert to string for boolean fields + #Used when enter the query + def build_query_fragment(self, field, filter_type, value): if isinstance(value, bool): value = str(value) if not isinstance(value, int) and field !='code': - value = re.sub(regexSpacePunc, " ", value) + value = re.sub(regex_space_punc, " ", value) valeur = super(CustomElasticSearchQuery, self).build_query_fragment(field, filter_type, value) return valeur +#The custom engine that determine backednd and search_query class CustomElasticEngine(ElasticsearchSearchEngine): backend = CustomElasticBackend query = CustomElasticSearchQuery diff --git a/telemeta/views/search.py b/telemeta/views/search.py index a736cb0e..b1e553eb 100644 --- a/telemeta/views/search.py +++ b/telemeta/views/search.py @@ -230,14 +230,14 @@ class BooleanSearchView(object): query += formul.cleaned_data["textField"].strip() + " " query += formul.cleaned_data["endBracket"] try: - self.isCorrectQuery(query.strip()) + self.is_correct_query(query.strip()) except Erreur as e: return HttpResponse(json.dumps({'result': e.message}), content_type='application/json') return HttpResponse(json.dumps({'result': query.strip()}), content_type='application/json') else: return HttpResponse(json.dumps({'result': '[ERROR]Field(s) missing'}), content_type='application/json') - def isCorrectQuery(self, query): + def is_correct_query(self, query): tabQuery = query.split() openBracket = 0 boolean = False -- 2.39.5