--- /dev/null
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# emacs temporary file
+*~
+\#*\#
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
# Register your models here.
from .models import Author, Keyword, Reference
-from .models import Event, EventType, EventVenue
+from .models import Event, EventEdition, EventType, EventVenue
+from .models import GeographicalClassification
from .models import Document
from .models import Notice
list_filter = ('name',)
search_fields = ['name']
ordering = ['name']
+
+class GeoAdmin(admin.ModelAdmin):
+ list_filter = ('name',)
+ search_fields = ['name']
+ ordering = ['name']
class NoticeAdmin(admin.ModelAdmin):
list_display = ('code', 'title')
search_fields = ['title', 'code']
filter_horizontal = ('authors', 'keywords',)
+class EventEditionAdmin(admin.ModelAdmin):
+ list_display = ('event', 'edition')
+ ordering = ['event', 'edition']
admin.site.register(Author, AuthorAdmin)
admin.site.register(Reference, ReferenceAdmin)
admin.site.register(Event, EventAdmin)
+admin.site.register(EventEdition, EventEditionAdmin)
admin.site.register(EventType, EventTypeAdmin)
admin.site.register(EventVenue, EventVenueAdmin)
-
+admin.site.register(GeographicalClassification, GeoAdmin)
admin.site.register(Document)
admin.site.register(Notice, NoticeAdmin)
from ...models import Document
from ...models import Notice
from ...models import Author, Keyword, Reference
-from ...models import Event, EventType, EventVenue
+from ...models import Event, EventEdition, EventType, EventVenue
+from ...models import GeographicalClassification
#import lxml.etree.ElementTree as ET
import xml.etree.ElementTree as ET
import os
+DEBUG = False
+
replacements = {'auteur_affiche_(dessin)>': 'auteur_affiche_dessin>',
'[record_no]>': 'record_no>',
'<': '???<',
#print doc_type
if doc_type == 'a-Notice spectacle':
document_traite +=1
- record_no = document.find('record_no').text
- code = document.find('Cote').text
+ record_no = document.findtext('record_no')
+ code = document.findtext('Cote')
+ event_type = document.findtext('Type_Manifestation')
+ if event_type is not None:
+ event_type_obj, c = EventType.objects.get_or_create(name=event_type)
+ else:
+ event_type_obj = None
- event_type = document.find('Type_Manifestation').text
- event_type_obj, c = EventType.objects.get_or_create(name=event_type)
-
- try:
- event_venue = document.find('Lieu_Manifestation').text
+ event_venue = document.findtext('Lieu_Manifestation')
+ if event_venue is not None:
event_venue_obj, c = EventVenue.objects.get_or_create(name=event_venue)
- except AttributeError:
- if document.find('Lieu_Manifestation') is None:
- event_venue_obj = None
- try:
- event = document.find('Festival_et_Manifestation').text
+ else:
+ event_venue_obj = None
+
+ event = document.findtext('Festival_et_Manifestation')
+ if event is not None:
event_obj, c = Event.objects.get_or_create(name=event)
- except AttributeError:
- if document.find('Festival_et_Manifestation') is None:
- event_obj = None
+
+ edition = document.findtext('No_edition')
+ try:
+ event_edition_obj, c = EventEdition.objects.get_or_create(event=event_obj, edition=edition)
+ except ValueError:
+ print code, edition
+ event_edition_obj, c = EventEdition.objects.get_or_create(event=event_obj, edition=None)
+ else:
+ event_edition_obj = None
notice, c = Notice.objects.get_or_create(old_id=record_no,
code=code,
- event=event_obj,
+ event_edition=event_edition_obj,
event_type=event_type_obj,
event_venue=event_venue_obj)
- notice.title = document.find('Titre').text
- try:
- notice.text = document.find('Texte').text
- except:
- pass
+ notice.title = document.findtext('Titre')
+
-
import datetime
try:
release_date = datetime.datetime.strptime(document.find('Date_de_parution').text,'%d/%m/%y').date()
for ref in document.findall('Reference'):
ref_obj, ref_c = Reference.objects.get_or_create(name=ref.text)
notice.references.add(ref_obj)
-
-
+ # GeographicalClassification
+ geo = document.findtext('Classement_Geographique')
+ if geo is not None:
+ geo_obj,c = GeographicalClassification.objects.get_or_create(name=geo)
+ notice.geographic_classification = geo_obj
notice.save()
else:
document_non_traite += 1
-
+ if DEBUG & (document_traite > 100):
+ break
print '-*-*--*-*-*-*-*-*-*-*'
print 'document_traité : %d' % document_traite
print 'document_non_traité : %d' % document_non_traite
--- /dev/null
+# -*- coding: utf-8 -*-
+# Generated by Django 1.9.2 on 2016-07-19 15:51
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Author',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100, unique=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ migrations.CreateModel(
+ name='Document',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('code', models.CharField(blank=True, max_length=200, verbose_name='code')),
+ ('title', models.CharField(max_length=200, verbose_name='title')),
+ ('old_id', models.IntegerField(unique=True)),
+ ('release_date', models.DateField(blank=True, null=True, verbose_name='release date')),
+ ('text', models.TextField(default='')),
+ ('indexation_date', models.DateField(blank=True, null=True, verbose_name='indexation date')),
+ ],
+ ),
+ migrations.CreateModel(
+ name='Event',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100, unique=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ migrations.CreateModel(
+ name='EventEdition',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('edition', models.IntegerField(blank=True, default=None, null=True)),
+ ('event', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.Event')),
+ ],
+ ),
+ migrations.CreateModel(
+ name='EventType',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100, unique=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ migrations.CreateModel(
+ name='EventVenue',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100, unique=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ migrations.CreateModel(
+ name='GeographicalClassification',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100, unique=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ migrations.CreateModel(
+ name='Keyword',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100, unique=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ migrations.CreateModel(
+ name='Reference',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100, unique=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ migrations.CreateModel(
+ name='Notice',
+ fields=[
+ ('document_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='mcm.Document')),
+ ('event_edition', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.EventEdition')),
+ ('event_type', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.EventType')),
+ ('event_venue', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.EventVenue')),
+ ],
+ bases=('mcm.document',),
+ ),
+ migrations.AddField(
+ model_name='document',
+ name='authors',
+ field=models.ManyToManyField(to='mcm.Author'),
+ ),
+ migrations.AddField(
+ model_name='document',
+ name='geographic_classification',
+ field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='mcm.GeographicalClassification'),
+ ),
+ migrations.AddField(
+ model_name='document',
+ name='keywords',
+ field=models.ManyToManyField(to='mcm.Keyword'),
+ ),
+ migrations.AddField(
+ model_name='document',
+ name='references',
+ field=models.ManyToManyField(to='mcm.Reference'),
+ ),
+ ]
class Event(BaseMany):
pass
-
+class GeographicalClassification(BaseMany):
+ pass
+
+class EventEdition(models.Model):
+ event = models.ForeignKey(Event, blank=True, null=True)#<Festival_et_Manifestation>Japon 93</Festival_et_Manifestation>
+ edition = models.IntegerField(default=None, blank=True, null=True) # <No_edition>16</No_edition>
+
+ def __unicode__(self):
+ if self.edition is None:
+ return self.event.name
+ else:
+ return ' - '.join([self.event.name, self.edition.__str__()])
+
DOCUMENT_TYPES = (
('a', 'Notice spectacle'),
#<Mots-cles>Musique d'Indonésie</Mots-cles>
keywords = models.ManyToManyField(Keyword) #<Mots-cles>Toraja</Mots-cles>
text = models.TextField(default='')
- #<Classement_Geographique>Indonésie</Classement_Geographique>
- event_type = models.ForeignKey(EventType)#<Type_Manifestation>Danse</Type_Manifestation>
- event_venue = models.ForeignKey(EventVenue, blank=True, null=True)#<Lieu_Manifestation>Le Rond Point, Théâtre Renaud-Barrault, Paris</Lieu_Manifestation>
- event = models.ForeignKey(Event, blank=True, null=True)#<Festival_et_Manifestation>Japon 93</Festival_et_Manifestation>
+ geographic_classification = models.ForeignKey(GeographicalClassification, null=True, blank=True)# <Classement_Geographique>Pérou</Classement_Geographique>
#<Support>Compact Disc Digital Audio</Support>
#<Duree>69'14</Duree>
#<Collection>Le chant du monde</Collection>
return self.title
class Notice(Document):
- pass
+ event_type = models.ForeignKey(EventType, blank=True, null=True)#<Type_Manifestation>Danse</Type_Manifestation>
+ event_venue = models.ForeignKey(EventVenue, blank=True, null=True)#<Lieu_Manifestation>Le Rond Point, Théâtre Renaud-Barrault, Paris</Lieu_Manifestation>
+ event_edition = models.ForeignKey(EventEdition, blank=True, null=True)
+
--- /dev/null
+indexation_date not null
+
+* Problème à traiter
+** Date de Parution : plusieurs formats
+Nombre d'erreur_date_parution : 886 /
+en général :
+- année : 1996, 2007
+- ou date 10/12/1997
+
+mais aussi :
+- inconnue (-> blank=True en base de donnée)
+- 2002-2003 (embêtant car ni une date ni une année ...)
+- 2015/09/08 (un seul cas)
+ <Cote>Séminaire2015</Cote>
+ <Titre>Patrimoine culturel immatériel et numérique :\rTransmission, participation, enjeux</Titre>
+ <Doc_no>5570</Doc_no>
+ <Type>a-Notice spectacle</Type>
+ <[record_no]>2280303</[record_no]>
+- 1971/1989 ???? période ? ou deux dates ?
+- 09/2010 , mois + année
+
+Solutions :
+- on laisse un champ string ... (plus possible de trier par date ou par année)
+- on met deux champs : year et date et on renseigne au moins l'un des deux (certains problème non résolu)
+
+** Auteurs
+On perd l'information du rôle:
+par exemple :
+ Titre : Cap-Vert. Escales musicales à Cabo Verde. Un week-end festif avec des musiciens et des chanteurs venus de diverses îles de l'archipel.
+
+ Auteurs : Interprète Oliveira, Maria da Luz ; Interprète Manazinha, Nha ; Interprète Mascarenhas, Aldina ; Compositeur Montrond, Joao ; Interprète Luis, Zé ; Interprète Grupo Nos Erança ; Direction musicale Moura, Ana Paula ; Interprète Association Sementera ; Direction musicale Gonçalves, Alfredo
+ ... (Producteur )
+ Cote : CV.04-4469
+** Manifestations:
+On peut avoir des notices de spectacles pour des spectacles sans Manifestations et/ou sans Lei de manifestations (blank=True, null=True en base de données)
+
+** Manifestations:
+- Edition : nombre entier sauf pour la Cote BR.7-91 : Art populaire du Brésil. De la campagne à la ville, un siècle d'imaginaire brésilien. 4 avril - 18 mai. (Printemps brésilien à Paris. Mars-avril 1987).
+- <No_edition>8-9</No_edition>