import sys, BaseHTTPServer, time, re, operator, xml.dom.pulldom, tempfile, urllib def rtime(v):return time.mktime([int(a)for a in v.split('T')[0].split('-')]+[0]*6) def pars(oaiargs, **pa): for a,v in map(lambda x:(x.split('=')+[''])[:2], oaiargs[1:].split('&')): if a in ('from','until')and vars().setdefault("tlen",len(v))!=len(v):raise ValueError if a:pa[a in pa and'DUP'+a or a] = a in('from','until')and rtime(v) or v return pa def filt(rli, pa):return((not pa.has_key('set') or pa['set'] in rli['sets']) and pa.get('from',0)<=rli['ds']<=pa.get('until',time.time())) class ReqHand(BaseHTTPServer.BaseHTTPRequestHandler): def getTR(self,pos):return(self.server.tf.seek(pos[0]),self.server.tf.read(pos[1]))[1] def log_message(self, format, *args): pass #otherwise headers go to stderr def wBase(self, tt, r): self.send_response(200) self.send_header('Content-Type','text/%s; charset=UTF-8'%tt) self.send_header('Content-Length',len(r)),self.end_headers(),self.wfile.write(r) def wOAI(self, verb, response): r ="%s%s\n"%( oaiB, time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime())) if verb:r+='http://%s\n<%s>\n%s\n\n'%( verb, self.server.hostName, verb, response, verb) else:r+='http://%s\n%s\n\n'%(self.server.hostName,response) self.wBase('xml',r) def vIdentify(self,pa):pa and[self.wError('badArgument')]or self.wOAI('Identify',ID) def vListMetadataFormats(self, pa): if len(pa)>1 or not AinL(pa, 'identifier'): return self.wError('badArgument') if pa.has_key('identifier')and not pa['identifier']in[ rli['id'] for rli in self.server.rLst]:return self.wError('idDoesNotExist') self.wOAI('ListMetadataFormats', LMF) def wError(self,err,args=''):self.wOAI('', '%s'%(err, args)) def vUn(self, pa): self.wError('badVerb', pa) def vGetRecord(self, pa): if not LinA(pa, 'metadataPrefix', 'identifier'): return self.wError('badArgument') if pa['metadataPrefix']!='oai_dc': return self.wError('cannotDisseminateFormat') if not[self.wOAI('GetRecord',self.getTR(rli['pos']))for rli in self.server.rLst if rli['id']==pa['identifier']]: self.wError('idDoesNotExist', pa['identifier']) def Lister(self, pa, listVerb): if not AinL(pa,'from','until','set','resumptionToken','metadataPrefix'): return self.wError('badArgument') if pa.has_key('resumptionToken'): if len(pa)>1: return self.wError('badArgument') prt = pars('&'+pa['resumptionToken'].replace(';','&').replace(':','=')) if not 'next' in prt: return self.wError('badResumptionToken') ps = int(prt['next']) else: if not pa.get('metadataPrefix',''): return self.wError('badArgument') if pa.get('metadataPrefix','')!='oai_dc':return self.wError('cannotDisseminateFormat') prt, ps = pa, 0 rlis=[rli for rli in self.server.rLst if filt(rli,pa)][ps:] if not rlis: return self.wError('noRecordsMatch') if listVerb=='ListIdentifiers': r,p=[],"
\n%s\n%s\n%s
" for rli in rlis[:mxR]: t=''.join(["%s"%s for s in rli['sets']]) r.append(p%(rli['id'], time.strftime("%Y-%m-%d",time.gmtime(rli['ds'])),t)) else: r = [self.getTR(x) for x in [rli['pos'] for rli in rlis][:mxR]] tok = ';'.join(["%s:%s"%x for x in prt.items()if x[0]!='next'])+';next:%d'%(ps+mxR) if len(rlis)>mxR: r.append("%s"%tok) self.wOAI(listVerb, '\n'.join(r)) def vListRecords(self, pa): self.Lister(pa, 'ListRecords') def vListIdentifiers(self,pa): self.Lister(pa, 'ListIdentifiers') def vListSets(self, pa, **sd): if pa.has_key("resumptionToken"):return self.wError('badResumptionToken') if pa: return self.wError('badArgument', pa.keys()[0]); for ss in reduce(operator.concat,[rli['sets'] for rli in self.server.rLst]):sd[ss]=1 self.wOAI('ListSets','\n'.join(['%s%s' %(k,k) for k in sd.keys()])) def vSTOP(self, pa):self.server.keepGoing = self.wfile.write('STOPPING') def do_GET(self): uqpath = urllib.unquote(self.path) if 1 in[c in uqpath for c in"<>#%\"{}|\\^[]'"]: return self.wError('badArgument') mo = re.search('/?2PageOAI/?\?verb=([A-Z][A-Za-z]+)(.*)', uqpath) print uqpath if not uqpath.find('2PageOAI')in[0,1]: return self.wBase('html',defHTML) if not mo: return self.vUn({}) try: parsedArgs = pars(mo.group(2)) except: return self.wError('badArgument') ReqHand.__dict__.get('v'+mo.group(1), ReqHand.vUn)(self, parsedArgs) def do_POST(self): self.path=self.path+'2PageOAI/?'+self.rfile.read(int(self.headers['Content-Length'])) self.do_GET() def getText(p): return ''.join([n.data for n in p.childNodes if n.nodeType==n.TEXT_NODE]) def LinA(A, *L):return reduce(operator.and_, [A.has_key(x) for x in L], 1) def AinL(A, *L):return reduce(operator.and_, [x in L for x in A.keys()], 1) def saveNode(tf, ns):return (tf.tell(),len(ns),tf.write(ns))[:-1] def loadRecordList(fileName, tf, recList): events = xml.dom.pulldom.parse(fileName) for (event, node) in events: if event=='START_ELEMENT' and node.tagName=='record': events.expandNode(node) hd = node.getElementsByTagName("header")[0] recList.append({'pos':saveNode(tf, node.toxml().encode('utf-8')), 'ds':rtime(getText(hd.getElementsByTagName("datestamp")[0])), 'sets':[getText(s) for s in hd.getElementsByTagName("setSpec")], 'id':getText(hd.getElementsByTagName("identifier")[0])}) if len(recList)%100 == 0: print '\r',len(recList), oaiB='\n' IDp='%s\nhttp://%s\n2.0\nmailto:%s\n1980-01-01\nno\nYYYY-MM-DD' LMF='\noai_dc\nhttp://www.openarchives.org/OAI/2.0/oai_dc.xsd\n\nhttp://www.openarchives.org/OAI/2.0/oai_dc/\n\n' defHTML='

This is a 2PageOAI server

Try "2PageOAI?verb=Identify" as the URL, or 2PageOAI Home for help.' repos = BaseHTTPServer.HTTPServer(('', int(sys.argv[1])), ReqHand) repos.keepGoing, repos.rLst, repos.tf = 1, [], tempfile.TemporaryFile() loadRecordList(sys.argv[2], repos.tf, repos.rLst) repos.hostName=len(sys.argv)>=4 and sys.argv[3] or 'UnknownHostAddress' mxR, ID = 100, IDp%('2PageOAI', repos.hostName,len(sys.argv)>=5 and sys.argv[4]or'email') while repos.keepGoing: repos.handle_request() """Copyright (c) 2000-2003 OCLC Online Computer Library Center, Inc. and other contributors. All rights reserved. The contents of this file, as updated from time to time by OCLC Research are subject to OCLC Research Public License Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a current copy of the License at http://purl.org/oclc/research/ORPL/. Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. This software consists of voluntary contributions made by many individuals on behalf of OCLC Research. For more information on OCLC Research, please see http://www.oclc.org/research/. This is the Original Code. The Initial Developers of the Original Code are Thomas Hickey (mailto:hickey@oclc.org) and Jenny Toves. Portions created by OCLC are Copyright (C) 2003. All Rights Reserved. (version: 2003 August 8)"""