From 48d0b8cc4cf792937bda81c0c3768acb6b26b87d Mon Sep 17 00:00:00 2001 From: Thomas Fillon Date: Tue, 19 Jul 2016 17:52:54 +0200 Subject: [PATCH] Add more fields to Notice document --- .gitignore | 93 +++++++++++++++++ mcm/admin.py | 14 ++- mcm/management/commands/import_xml.py | 63 +++++++----- mcm/migrations/0001_initial.py | 137 ++++++++++++++++++++++++++ mcm/migrations/__init__.py | 0 mcm/models.py | 24 +++-- notes.org | 39 ++++++++ 7 files changed, 336 insertions(+), 34 deletions(-) create mode 100644 .gitignore create mode 100644 mcm/migrations/0001_initial.py create mode 100644 mcm/migrations/__init__.py create mode 100644 notes.org diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..376833a --- /dev/null +++ b/.gitignore @@ -0,0 +1,93 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# emacs temporary file +*~ +\#*\# + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject diff --git a/mcm/admin.py b/mcm/admin.py index c5a268f..ff80824 100644 --- a/mcm/admin.py +++ b/mcm/admin.py @@ -2,7 +2,8 @@ from django.contrib import admin # Register your models here. from .models import Author, Keyword, Reference -from .models import Event, EventType, EventVenue +from .models import Event, EventEdition, EventType, EventVenue +from .models import GeographicalClassification from .models import Document from .models import Notice @@ -36,6 +37,11 @@ class EventVenueAdmin(admin.ModelAdmin): list_filter = ('name',) search_fields = ['name'] ordering = ['name'] + +class GeoAdmin(admin.ModelAdmin): + list_filter = ('name',) + search_fields = ['name'] + ordering = ['name'] class NoticeAdmin(admin.ModelAdmin): list_display = ('code', 'title') @@ -43,6 +49,9 @@ class NoticeAdmin(admin.ModelAdmin): search_fields = ['title', 'code'] filter_horizontal = ('authors', 'keywords',) +class EventEditionAdmin(admin.ModelAdmin): + list_display = ('event', 'edition') + ordering = ['event', 'edition'] admin.site.register(Author, AuthorAdmin) @@ -50,9 +59,10 @@ admin.site.register(Keyword, KeywordAdmin) admin.site.register(Reference, ReferenceAdmin) admin.site.register(Event, EventAdmin) +admin.site.register(EventEdition, EventEditionAdmin) admin.site.register(EventType, EventTypeAdmin) admin.site.register(EventVenue, EventVenueAdmin) - +admin.site.register(GeographicalClassification, GeoAdmin) admin.site.register(Document) admin.site.register(Notice, NoticeAdmin) diff --git a/mcm/management/commands/import_xml.py b/mcm/management/commands/import_xml.py index 8454b15..b508361 100644 --- a/mcm/management/commands/import_xml.py +++ b/mcm/management/commands/import_xml.py @@ -5,12 +5,15 @@ from django.core.management.base import BaseCommand, CommandError from ...models import Document from ...models import Notice from ...models import Author, Keyword, Reference -from ...models import Event, EventType, EventVenue +from ...models import Event, EventEdition, EventType, EventVenue +from ...models import GeographicalClassification #import lxml.etree.ElementTree as ET import xml.etree.ElementTree as ET import os +DEBUG = False + replacements = {'auteur_affiche_(dessin)>': 'auteur_affiche_dessin>', '[record_no]>': 'record_no>', '<': '???<', @@ -73,39 +76,43 @@ class Command(BaseCommand): #print doc_type if doc_type == 'a-Notice spectacle': document_traite +=1 - record_no = document.find('record_no').text - code = document.find('Cote').text + record_no = document.findtext('record_no') + code = document.findtext('Cote') + event_type = document.findtext('Type_Manifestation') + if event_type is not None: + event_type_obj, c = EventType.objects.get_or_create(name=event_type) + else: + event_type_obj = None - event_type = document.find('Type_Manifestation').text - event_type_obj, c = EventType.objects.get_or_create(name=event_type) - - try: - event_venue = document.find('Lieu_Manifestation').text + event_venue = document.findtext('Lieu_Manifestation') + if event_venue is not None: event_venue_obj, c = EventVenue.objects.get_or_create(name=event_venue) - except AttributeError: - if document.find('Lieu_Manifestation') is None: - event_venue_obj = None - try: - event = document.find('Festival_et_Manifestation').text + else: + event_venue_obj = None + + event = document.findtext('Festival_et_Manifestation') + if event is not None: event_obj, c = Event.objects.get_or_create(name=event) - except AttributeError: - if document.find('Festival_et_Manifestation') is None: - event_obj = None + + edition = document.findtext('No_edition') + try: + event_edition_obj, c = EventEdition.objects.get_or_create(event=event_obj, edition=edition) + except ValueError: + print code, edition + event_edition_obj, c = EventEdition.objects.get_or_create(event=event_obj, edition=None) + else: + event_edition_obj = None notice, c = Notice.objects.get_or_create(old_id=record_no, code=code, - event=event_obj, + event_edition=event_edition_obj, event_type=event_type_obj, event_venue=event_venue_obj) - notice.title = document.find('Titre').text - try: - notice.text = document.find('Texte').text - except: - pass + notice.title = document.findtext('Titre') + - import datetime try: release_date = datetime.datetime.strptime(document.find('Date_de_parution').text,'%d/%m/%y').date() @@ -139,12 +146,16 @@ class Command(BaseCommand): for ref in document.findall('Reference'): ref_obj, ref_c = Reference.objects.get_or_create(name=ref.text) notice.references.add(ref_obj) - - + # GeographicalClassification + geo = document.findtext('Classement_Geographique') + if geo is not None: + geo_obj,c = GeographicalClassification.objects.get_or_create(name=geo) + notice.geographic_classification = geo_obj notice.save() else: document_non_traite += 1 - + if DEBUG & (document_traite > 100): + break print '-*-*--*-*-*-*-*-*-*-*' print 'document_traité : %d' % document_traite print 'document_non_traité : %d' % document_non_traite diff --git a/mcm/migrations/0001_initial.py b/mcm/migrations/0001_initial.py new file mode 100644 index 0000000..bc0ffab --- /dev/null +++ b/mcm/migrations/0001_initial.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.9.2 on 2016-07-19 15:51 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Author', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Document', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('code', models.CharField(blank=True, max_length=200, verbose_name='code')), + ('title', models.CharField(max_length=200, verbose_name='title')), + ('old_id', models.IntegerField(unique=True)), + ('release_date', models.DateField(blank=True, null=True, verbose_name='release date')), + ('text', models.TextField(default='')), + ('indexation_date', models.DateField(blank=True, null=True, verbose_name='indexation date')), + ], + ), + migrations.CreateModel( + name='Event', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='EventEdition', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('edition', models.IntegerField(blank=True, default=None, null=True)), + ('event', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.Event')), + ], + ), + migrations.CreateModel( + name='EventType', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='EventVenue', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='GeographicalClassification', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Keyword', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Reference', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Notice', + fields=[ + ('document_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='mcm.Document')), + ('event_edition', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.EventEdition')), + ('event_type', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.EventType')), + ('event_venue', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.EventVenue')), + ], + bases=('mcm.document',), + ), + migrations.AddField( + model_name='document', + name='authors', + field=models.ManyToManyField(to='mcm.Author'), + ), + migrations.AddField( + model_name='document', + name='geographic_classification', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.GeographicalClassification'), + ), + migrations.AddField( + model_name='document', + name='keywords', + field=models.ManyToManyField(to='mcm.Keyword'), + ), + migrations.AddField( + model_name='document', + name='references', + field=models.ManyToManyField(to='mcm.Reference'), + ), + ] diff --git a/mcm/migrations/__init__.py b/mcm/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcm/models.py b/mcm/models.py index d889249..35f97bf 100644 --- a/mcm/models.py +++ b/mcm/models.py @@ -35,7 +35,19 @@ class EventVenue(BaseMany): class Event(BaseMany): pass - +class GeographicalClassification(BaseMany): + pass + +class EventEdition(models.Model): + event = models.ForeignKey(Event, blank=True, null=True)#Japon 93 + edition = models.IntegerField(default=None, blank=True, null=True) # 16 + + def __unicode__(self): + if self.edition is None: + return self.event.name + else: + return ' - '.join([self.event.name, self.edition.__str__()]) + DOCUMENT_TYPES = ( ('a', 'Notice spectacle'), @@ -65,10 +77,7 @@ class Document(models.Model): #Musique d'Indonésie keywords = models.ManyToManyField(Keyword) #Toraja text = models.TextField(default='') - #Indonésie - event_type = models.ForeignKey(EventType)#Danse - event_venue = models.ForeignKey(EventVenue, blank=True, null=True)#Le Rond Point, Théâtre Renaud-Barrault, Paris - event = models.ForeignKey(Event, blank=True, null=True)#Japon 93 + geographic_classification = models.ForeignKey(GeographicalClassification, null=True, blank=True)# Pérou #Compact Disc Digital Audio #69'14 #Le chant du monde @@ -82,4 +91,7 @@ class Document(models.Model): return self.title class Notice(Document): - pass + event_type = models.ForeignKey(EventType, blank=True, null=True)#Danse + event_venue = models.ForeignKey(EventVenue, blank=True, null=True)#Le Rond Point, Théâtre Renaud-Barrault, Paris + event_edition = models.ForeignKey(EventEdition, blank=True, null=True) + diff --git a/notes.org b/notes.org new file mode 100644 index 0000000..751a212 --- /dev/null +++ b/notes.org @@ -0,0 +1,39 @@ +indexation_date not null + +* Problème à traiter +** Date de Parution : plusieurs formats +Nombre d'erreur_date_parution : 886 / +en général : +- année : 1996, 2007 +- ou date 10/12/1997 + +mais aussi : +- inconnue (-> blank=True en base de donnée) +- 2002-2003 (embêtant car ni une date ni une année ...) +- 2015/09/08 (un seul cas) + Séminaire2015 + Patrimoine culturel immatériel et numérique : Transmission, participation, enjeux + 5570 + a-Notice spectacle + <[record_no]>2280303 +- 1971/1989 ???? période ? ou deux dates ? +- 09/2010 , mois + année + +Solutions : +- on laisse un champ string ... (plus possible de trier par date ou par année) +- on met deux champs : year et date et on renseigne au moins l'un des deux (certains problème non résolu) + +** Auteurs +On perd l'information du rôle: +par exemple : + Titre : Cap-Vert. Escales musicales à Cabo Verde. Un week-end festif avec des musiciens et des chanteurs venus de diverses îles de l'archipel. + + Auteurs : Interprète Oliveira, Maria da Luz ; Interprète Manazinha, Nha ; Interprète Mascarenhas, Aldina ; Compositeur Montrond, Joao ; Interprète Luis, Zé ; Interprète Grupo Nos Erança ; Direction musicale Moura, Ana Paula ; Interprète Association Sementera ; Direction musicale Gonçalves, Alfredo + ... (Producteur ) + Cote : CV.04-4469 +** Manifestations: +On peut avoir des notices de spectacles pour des spectacles sans Manifestations et/ou sans Lei de manifestations (blank=True, null=True en base de données) + +** Manifestations: +- Edition : nombre entier sauf pour la Cote BR.7-91 : Art populaire du Brésil. De la campagne à la ville, un siècle d'imaginaire brésilien. 4 avril - 18 mai. (Printemps brésilien à Paris. Mars-avril 1987). +- 8-9 -- 2.39.5