From de4b629b3f3f4ac1be182a4360f3ac6fbbde2cdb Mon Sep 17 00:00:00 2001 From: olivier <> Date: Thu, 2 Apr 2009 17:22:52 +0000 Subject: [PATCH] #67: implement OAI-PMH ListIdentifiers method --- telemeta/interop/oai.py | 65 +++++++++++++++++++++++++++++++++++++ telemeta/interop/oaitest.py | 23 +++++++++++++ 2 files changed, 88 insertions(+) diff --git a/telemeta/interop/oai.py b/telemeta/interop/oai.py index 56a0eb47..6617fbf0 100644 --- a/telemeta/interop/oai.py +++ b/telemeta/interop/oai.py @@ -16,6 +16,18 @@ class IDataSource(object): or None if the record doesn't exist""" pass + def count_records(self, from_time = None, until_time = None): + """Must return the number of record identifiers between (optional) from and + until change time.""" + pass + + def list_identifiers(self, offset, limit, from_time = None, until_time = None): + """Must return the list of record identifiers between (optional) from and + until change time, starting from record at offset, with a maximum of limit + entries. Each entry of the list must be a tuple containing the identifier and + the change time. If no record matches, should return an empty list.""" + pass + class ArgumentValidator(object): """OAI-PMH request argument validator""" @@ -66,6 +78,9 @@ class ArgumentValidator(object): for k in self.request: try: all_args.index(k) + if (k == 'set'): + self.response.error('noSetHierarchy') + return False except ValueError: self.response.error('badArgument', 'Invalid argument: %s' % k) return False @@ -100,6 +115,8 @@ class ArgumentValidator(object): class DataProvider(object): """OAI-PMH Data Provider""" + max_records_per_response = 500 + def __init__(self, repository_name, base_url, admin_email): self.identity = { 'repositoryName': repository_name, @@ -114,10 +131,23 @@ class DataProvider(object): """Parse an ISO8601 date string into a datetime object""" return datetime.strptime(str, '%Y-%m-%dT%H-%M-%SZ') + def parse_time_range(self, args): + if args.get('from'): + from_time = self.parse_time(args['from']) + else: + from_time = None + if args.get('until'): + until_time = self.parse_time(args['until']) + else: + until_time = None + + return from_time, until_time + def handle(self, args, datasource): """Handle a request and return the response as a DOM document""" response = Response(self.identity, datasource) + response.max_records_per_response = self.max_records_per_response validator = ArgumentValidator(args, response) validator.accept_format('oai_dc') @@ -132,6 +162,12 @@ class DataProvider(object): elif verb == 'GetRecord': validator.require('identifier', 'metadataPrefix') validator.validate() and response.get_record(args['identifier']) + elif verb == 'ListIdentifiers': + validator.require('metadataPrefix') + validator.optional('from', 'until', 'set', 'resumptionToken') + from_time, until_time = self.parse_time_range(args) + token = args.get('resumptionToken') + validator.validate() and response.list_identifiers(from_time, until_time, token) doc = libxml2.parseDoc(response.doc.toxml(encoding="utf-8")) response.free() @@ -142,6 +178,8 @@ class DataProvider(object): class Response(object): """OAI-PMH response generation""" + max_records_per_response = 500 + def __init__(self, identity, datasource): self.identity = identity self.datasource = datasource @@ -251,6 +289,33 @@ class Response(object): self.set_attributes(self.request, {'identifier': id, 'metadataPrefix': 'oai_dc'}) container = self.root.appendChild(self.doc.createElement(self.verb)) container.appendChild(self.make_record(id, dc, ctime)) + + def list_identifiers(self, from_time, until_time, token = None): + """Append ListIdentifiers result""" + offset = 0 + if token: + try: + offset = int(token) + except ValueError: + self.error('badArgument', 'Incorrect resumption token') + return + + count = self.datasource.count_records(from_time, until_time) + data = self.datasource.list_identifiers(offset, self.max_records_per_response, from_time, until_time) + if (len(data) > self.max_records_per_response): + raise Exception("DataSource.list_identifiers() returned too many records") + + container = self.root.appendChild(self.doc.createElement(self.verb)) + for item in data: + id, ctime = item + container.appendChild(self.make_record_header(id, ctime)) + if count - offset > self.max_records_per_response: + token = self.root.appendChild(self.doc.createElement('resumptionToken')) + token.setAttribute('completeListSize', str(count)) + token.appendChild(self.doc.createTextNode(str(offset + len(data)))) + elif offset: + token = self.root.appendChild(self.doc.createElement('resumptionToken')) + token.setAttribute('completeListSize', str(count)) def free(self): """Free the resources used by this response""" diff --git a/telemeta/interop/oaitest.py b/telemeta/interop/oaitest.py index 707aeb41..7360bd3a 100644 --- a/telemeta/interop/oaitest.py +++ b/telemeta/interop/oaitest.py @@ -21,6 +21,28 @@ class DataSource(object): def get_record(self, id): return self.data.get(id) + def count_records(self, from_time = None, until_time = None): + result = 0 + for k in self.data: + dc, ctime = self.data[k] + if ((not from_time) or ctime >= from_time) and ((not until_time) or ctime <= until_time): + result += 1 + #result.push((k, ctime)) + return result + + def list_identifiers(self, offset, limit, from_time = None, until_time = None): + result = [] + i = 0 + n = 0 + for k in self.data: + dc, ctime = self.data[k] + if ((not from_time) or ctime >= from_time) and ((not until_time) or ctime <= until_time): + if (i >= offset) and (n < limit): + result.append((k, ctime)) + n += 1 + i += 1 + return result + args = {} for item in sys.argv[1:]: k, v = item.split('=') @@ -28,4 +50,5 @@ for item in sys.argv[1:]: datasource = DataSource() provider = DataProvider("Test Provider", "http://test.provider.com", "joe@provider.com") +provider.max_records_per_response = 2 print provider.handle(args, datasource) -- 2.39.5