--- /dev/null
- * Stub for future feature.
+/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
+
+/**
+ * This is the main entry point for loading a PDF and interacting with it.
+ * NOTE: If a URL is used to fetch the PDF data a standard XMLHttpRequest(XHR)
+ * is used, which means it must follow the same origin rules that any XHR does
+ * e.g. No cross domain requests without CORS.
+ *
+ * @param {string|TypedAray} source Either a url to a PDF is located or a
+ * typed array already populated with data.
+ * @return {Promise} A promise that is resolved with {PDFDocumentProxy} object.
+ */
+PDFJS.getDocument = function getDocument(source) {
+ var promise = new PDFJS.Promise();
+ var transport = new WorkerTransport(promise);
+ if (typeof source === 'string') {
+ // fetch url
+ PDFJS.getPdf(
+ {
+ url: source,
+ progress: function getPDFProgress(evt) {
+ if (evt.lengthComputable)
+ promise.progress({
+ loaded: evt.loaded,
+ total: evt.total
+ });
+ },
+ error: function getPDFError(e) {
+ promise.reject('Unexpected server response of ' +
+ e.target.status + '.');
+ }
+ },
+ function getPDFLoad(data) {
+ transport.sendData(data);
+ });
+ } else {
+ // assuming the source is array, instantiating directly from it
+ transport.sendData(source);
+ }
+ return promise;
+};
+
+/**
+ * Proxy to a PDFDocument in the worker thread. Also, contains commonly used
+ * properties that can be read synchronously.
+ */
+var PDFDocumentProxy = (function() {
+ function PDFDocumentProxy(pdfInfo, transport) {
+ this.pdfInfo = pdfInfo;
+ this.transport = transport;
+ }
+ PDFDocumentProxy.prototype = {
+ /**
+ * @return {number} Total number of pages the PDF contains.
+ */
+ get numPages() {
+ return this.pdfInfo.numPages;
+ },
+ /**
+ * @return {string} A unique ID to identify a PDF. Not guaranteed to be
+ * unique.
+ */
+ get fingerprint() {
+ return this.pdfInfo.fingerprint;
+ },
+ /**
+ * @param {number} The page number to get. The first page is 1.
+ * @return {Promise} A promise that is resolved with a {PDFPageProxy}
+ * object.
+ */
+ getPage: function(number) {
+ return this.transport.getPage(number);
+ },
+ /**
+ * @return {Promise} A promise that is resolved with a lookup table for
+ * mapping named destinations to reference numbers.
+ */
+ getDestinations: function() {
+ var promise = new PDFJS.Promise();
+ var destinations = this.pdfInfo.destinations;
+ promise.resolve(destinations);
+ return promise;
+ },
+ /**
+ * @return {Promise} A promise that is resolved with an {array} that is a
+ * tree outline (if it has one) of the PDF. The tree is in the format of:
+ * [
+ * {
+ * title: string,
+ * bold: boolean,
+ * italic: boolean,
+ * color: rgb array,
+ * dest: dest obj,
+ * items: array of more items like this
+ * },
+ * ...
+ * ].
+ */
+ getOutline: function() {
+ var promise = new PDFJS.Promise();
+ var outline = this.pdfInfo.outline;
+ promise.resolve(outline);
+ return promise;
+ },
+ /**
+ * @return {Promise} A promise that is resolved with an {object} that has
+ * info and metadata properties. Info is an {object} filled with anything
+ * available in the information dictionary and similarly metadata is a
+ * {Metadata} object with information from the metadata section of the PDF.
+ */
+ getMetadata: function() {
+ var promise = new PDFJS.Promise();
+ var info = this.pdfInfo.info;
+ var metadata = this.pdfInfo.metadata;
+ promise.resolve({
+ info: info,
+ metadata: metadata ? new PDFJS.Metadata(metadata) : null
+ });
+ return promise;
+ },
+ destroy: function() {
+ this.transport.destroy();
+ }
+ };
+ return PDFDocumentProxy;
+})();
+
+var PDFPageProxy = (function PDFPageProxyClosure() {
+ function PDFPageProxy(pageInfo, transport) {
+ this.pageInfo = pageInfo;
+ this.transport = transport;
+ this.stats = new StatTimer();
+ this.stats.enabled = !!globalScope.PDFJS.enableStats;
+ this.objs = transport.objs;
+ this.renderInProgress = false;
+ }
+ PDFPageProxy.prototype = {
+ /**
+ * @return {number} Page number of the page. First page is 1.
+ */
+ get pageNumber() {
+ return this.pageInfo.pageIndex + 1;
+ },
+ /**
+ * @return {number} The number of degrees the page is rotated clockwise.
+ */
+ get rotate() {
+ return this.pageInfo.rotate;
+ },
+ /**
+ * @return {object} The reference that points to this page. It has 'num' and
+ * 'gen' properties.
+ */
+ get ref() {
+ return this.pageInfo.ref;
+ },
+ /**
+ * @return {array} An array of the visible portion of the PDF page in the
+ * user space units - [x1, y1, x2, y2].
+ */
+ get view() {
+ return this.pageInfo.view;
+ },
+ /**
+ * @param {number} scale The desired scale of the viewport.
+ * @param {number} rotate Degrees to rotate the viewport. If omitted this
+ * defaults to the page rotation.
+ * @return {PageViewport} Contains 'width' and 'height' properties along
+ * with transforms required for rendering.
+ */
+ getViewport: function(scale, rotate) {
+ if (arguments.length < 2)
+ rotate = this.rotate;
+ return new PDFJS.PageViewport(this.view, scale, rotate, 0, 0);
+ },
+ /**
+ * @return {Promise} A promise that is resolved with an {array} of the
+ * annotation objects.
+ */
+ getAnnotations: function() {
+ if (this.annotationsPromise)
+ return this.annotationsPromise;
+
+ var promise = new PDFJS.Promise();
+ this.annotationsPromise = promise;
+ this.transport.getAnnotations(this.pageInfo.pageIndex);
+ return promise;
+ },
+ /**
+ * Begins the process of rendering a page to the desired context.
+ * @param {object} params A parameter object that supports:
+ * {
+ * canvasContext(required): A 2D context of a DOM Canvas object.,
+ * textLayer(optional): An object that has beginLayout, endLayout, and
+ * appendText functions.
+ * }.
+ * @return {Promise} A promise that is resolved when the page finishes
+ * rendering.
+ */
+ render: function(params) {
+ this.renderInProgress = true;
+
+ var promise = new Promise();
+ var stats = this.stats;
+ stats.time('Overall');
+ // If there is no displayReadyPromise yet, then the operatorList was never
+ // requested before. Make the request and create the promise.
+ if (!this.displayReadyPromise) {
+ this.displayReadyPromise = new Promise();
+ this.destroyed = false;
+
+ this.stats.time('Page Request');
+ this.transport.messageHandler.send('RenderPageRequest', {
+ pageIndex: this.pageNumber - 1
+ });
+ }
+
+ var self = this;
+ function complete(error) {
+ self.renderInProgress = false;
+ if (self.destroyed) {
+ delete self.operatorList;
+ delete self.displayReadyPromise;
+ }
+
+ if (error)
+ promise.reject(error);
+ else
+ promise.resolve();
+ };
+
+ // Once the operatorList and fonts are loaded, do the actual rendering.
+ this.displayReadyPromise.then(
+ function pageDisplayReadyPromise() {
+ if (self.destroyed) {
+ complete();
+ return;
+ }
+
+ var gfx = new CanvasGraphics(params.canvasContext,
+ this.objs, params.textLayer);
+ try {
+ this.display(gfx, params.viewport, complete);
+ } catch (e) {
+ complete(e);
+ }
+ }.bind(this),
+ function pageDisplayReadPromiseError(reason) {
+ complete(reason);
+ }
+ );
+
+ return promise;
+ },
+ /**
+ * For internal use only.
+ */
+ startRenderingFromOperatorList:
+ function PDFPageWrapper_startRenderingFromOperatorList(operatorList,
+ fonts) {
+ var self = this;
+ this.operatorList = operatorList;
+
+ var displayContinuation = function pageDisplayContinuation() {
+ // Always defer call to display() to work around bug in
+ // Firefox error reporting from XHR callbacks.
+ setTimeout(function pageSetTimeout() {
+ self.displayReadyPromise.resolve();
+ });
+ };
+
+ this.ensureFonts(fonts,
+ function pageStartRenderingFromOperatorListEnsureFonts() {
+ displayContinuation();
+ }
+ );
+ },
+ /**
+ * For internal use only.
+ */
+ ensureFonts: function PDFPageWrapper_ensureFonts(fonts, callback) {
+ this.stats.time('Font Loading');
+ // Convert the font names to the corresponding font obj.
+ for (var i = 0, ii = fonts.length; i < ii; i++) {
+ fonts[i] = this.objs.objs[fonts[i]].data;
+ }
+
+ // Load all the fonts
+ FontLoader.bind(
+ fonts,
+ function pageEnsureFontsFontObjs(fontObjs) {
+ this.stats.timeEnd('Font Loading');
+
+ callback.call(this);
+ }.bind(this)
+ );
+ },
+ /**
+ * For internal use only.
+ */
+ display: function PDFPageWrapper_display(gfx, viewport, callback) {
+ var stats = this.stats;
+ stats.time('Rendering');
+
+ gfx.beginDrawing(viewport);
+
+ var startIdx = 0;
+ var length = this.operatorList.fnArray.length;
+ var operatorList = this.operatorList;
+ var stepper = null;
+ if (PDFJS.pdfBug && StepperManager.enabled) {
+ stepper = StepperManager.create(this.pageNumber - 1);
+ stepper.init(operatorList);
+ stepper.nextBreakPoint = stepper.getNextBreakPoint();
+ }
+
+ var self = this;
+ function next() {
+ startIdx =
+ gfx.executeOperatorList(operatorList, startIdx, next, stepper);
+ if (startIdx == length) {
+ gfx.endDrawing();
+ stats.timeEnd('Rendering');
+ stats.timeEnd('Overall');
+ if (callback) callback();
+ }
+ }
+ next();
+ },
+ /**
- var textContent = 'page text'; // not implemented
- promise.resolve(textContent);
++ * @return {Promise} That is resolved with the a {string} that is the text
++ * content from the page.
+ */
+ getTextContent: function() {
+ var promise = new PDFJS.Promise();
++ this.transport.messageHandler.send('GetTextContent', {
++ pageIndex: this.pageNumber - 1
++ },
++ function textContentCallback(textContent) {
++ promise.resolve(textContent);
++ }
++ );
+ return promise;
+ },
+ /**
+ * Stub for future feature.
+ */
+ getOperationList: function() {
+ var promise = new PDFJS.Promise();
+ var operationList = { // not implemented
+ dependencyFontsID: null,
+ operatorList: null
+ };
+ promise.resolve(operationList);
+ return promise;
+ },
+ /**
+ * Destroys resources allocated by the page.
+ */
+ destroy: function() {
+ this.destroyed = true;
+
+ if (!this.renderInProgress) {
+ delete this.operatorList;
+ delete this.displayReadyPromise;
+ }
+ }
+ };
+ return PDFPageProxy;
+})();
+/**
+ * For internal use only.
+ */
+var WorkerTransport = (function WorkerTransportClosure() {
+ function WorkerTransport(promise) {
+ this.workerReadyPromise = promise;
+ this.objs = new PDFObjects();
+
+ this.pageCache = [];
+ this.pagePromises = [];
+ this.fontsLoading = {};
+
+ // If worker support isn't disabled explicit and the browser has worker
+ // support, create a new web worker and test if it/the browser fullfills
+ // all requirements to run parts of pdf.js in a web worker.
+ // Right now, the requirement is, that an Uint8Array is still an Uint8Array
+ // as it arrives on the worker. Chrome added this with version 15.
+ if (!globalScope.PDFJS.disableWorker && typeof Worker !== 'undefined') {
+ var workerSrc = PDFJS.workerSrc;
+ if (typeof workerSrc === 'undefined') {
+ error('No PDFJS.workerSrc specified');
+ }
+
+ try {
+ var worker;
+ if (PDFJS.isFirefoxExtension) {
+ // The firefox extension can't load the worker from the resource://
+ // url so we have to inline the script and then use the blob loader.
+ var bb = new MozBlobBuilder();
+ bb.append(document.querySelector('#PDFJS_SCRIPT_TAG').textContent);
+ var blobUrl = window.URL.createObjectURL(bb.getBlob());
+ worker = new Worker(blobUrl);
+ } else {
+ // Some versions of FF can't create a worker on localhost, see:
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=683280
+ worker = new Worker(workerSrc);
+ }
+
+ var messageHandler = new MessageHandler('main', worker);
+ this.messageHandler = messageHandler;
+
+ messageHandler.on('test', function transportTest(supportTypedArray) {
+ if (supportTypedArray) {
+ this.worker = worker;
+ this.setupMessageHandler(messageHandler);
+ } else {
+ globalScope.PDFJS.disableWorker = true;
+ this.setupFakeWorker();
+ }
+ }.bind(this));
+
+ var testObj = new Uint8Array(1);
+ // Some versions of Opera throw a DATA_CLONE_ERR on
+ // serializing the typed array.
+ messageHandler.send('test', testObj);
+ return;
+ } catch (e) {
+ warn('The worker has been disabled.');
+ }
+ }
+ // Either workers are disabled, not supported or have thrown an exception.
+ // Thus, we fallback to a faked worker.
+ globalScope.PDFJS.disableWorker = true;
+ this.setupFakeWorker();
+ }
+ WorkerTransport.prototype = {
+ destroy: function WorkerTransport_destroy() {
+ if (this.worker)
+ this.worker.terminate();
+
+ this.pageCache = [];
+ this.pagePromises = [];
+ },
+ setupFakeWorker: function WorkerTransport_setupFakeWorker() {
+ // If we don't use a worker, just post/sendMessage to the main thread.
+ var fakeWorker = {
+ postMessage: function WorkerTransport_postMessage(obj) {
+ fakeWorker.onmessage({data: obj});
+ },
+ terminate: function WorkerTransport_terminate() {}
+ };
+
+ var messageHandler = new MessageHandler('main', fakeWorker);
+ this.setupMessageHandler(messageHandler);
+
+ // If the main thread is our worker, setup the handling for the messages
+ // the main thread sends to it self.
+ WorkerMessageHandler.setup(messageHandler);
+ },
+
+ setupMessageHandler:
+ function WorkerTransport_setupMessageHandler(messageHandler) {
+ this.messageHandler = messageHandler;
+
+ messageHandler.on('GetDoc', function transportDoc(data) {
+ var pdfInfo = data.pdfInfo;
+ var pdfDocument = new PDFDocumentProxy(pdfInfo, this);
+ this.pdfDocument = pdfDocument;
+ this.workerReadyPromise.resolve(pdfDocument);
+ }, this);
+
+ messageHandler.on('GetPage', function transportPage(data) {
+ var pageInfo = data.pageInfo;
+ var page = new PDFPageProxy(pageInfo, this);
+ this.pageCache[pageInfo.pageIndex] = page;
+ var promise = this.pagePromises[pageInfo.pageIndex];
+ promise.resolve(page);
+ }, this);
+
+ messageHandler.on('GetAnnotations', function transportAnnotations(data) {
+ var annotations = data.annotations;
+ var promise = this.pageCache[data.pageIndex].annotationsPromise;
+ promise.resolve(annotations);
+ }, this);
+
+ messageHandler.on('RenderPage', function transportRender(data) {
+ var page = this.pageCache[data.pageIndex];
+ var depFonts = data.depFonts;
+
+ page.stats.timeEnd('Page Request');
+ page.startRenderingFromOperatorList(data.operatorList, depFonts);
+ }, this);
+
+ messageHandler.on('obj', function transportObj(data) {
+ var id = data[0];
+ var type = data[1];
+ if (this.objs.hasData(id))
+ return;
+
+ switch (type) {
+ case 'JpegStream':
+ var imageData = data[2];
+ loadJpegStream(id, imageData, this.objs);
+ break;
+ case 'Image':
+ var imageData = data[2];
+ this.objs.resolve(id, imageData);
+ break;
+ case 'Font':
+ var name = data[2];
+ var file = data[3];
+ var properties = data[4];
+
+ if (file) {
+ // Rewrap the ArrayBuffer in a stream.
+ var fontFileDict = new Dict();
+ file = new Stream(file, 0, file.length, fontFileDict);
+ }
+
+ // At this point, only the font object is created but the font is
+ // not yet attached to the DOM. This is done in `FontLoader.bind`.
+ var font = new Font(name, file, properties);
+ this.objs.resolve(id, font);
+ break;
+ default:
+ error('Got unkown object type ' + type);
+ }
+ }, this);
+
+ messageHandler.on('PageError', function transportError(data) {
+ var page = this.pageCache[data.pageNum - 1];
+ if (page.displayReadyPromise)
+ page.displayReadyPromise.reject(data.error);
+ else
+ error(data.error);
+ }, this);
+
+ messageHandler.on('JpegDecode', function(data, promise) {
+ var imageData = data[0];
+ var components = data[1];
+ if (components != 3 && components != 1)
+ error('Only 3 component or 1 component can be returned');
+
+ var img = new Image();
+ img.onload = (function messageHandler_onloadClosure() {
+ var width = img.width;
+ var height = img.height;
+ var size = width * height;
+ var rgbaLength = size * 4;
+ var buf = new Uint8Array(size * components);
+ var tmpCanvas = createScratchCanvas(width, height);
+ var tmpCtx = tmpCanvas.getContext('2d');
+ tmpCtx.drawImage(img, 0, 0);
+ var data = tmpCtx.getImageData(0, 0, width, height).data;
+
+ if (components == 3) {
+ for (var i = 0, j = 0; i < rgbaLength; i += 4, j += 3) {
+ buf[j] = data[i];
+ buf[j + 1] = data[i + 1];
+ buf[j + 2] = data[i + 2];
+ }
+ } else if (components == 1) {
+ for (var i = 0, j = 0; i < rgbaLength; i += 4, j++) {
+ buf[j] = data[i];
+ }
+ }
+ promise.resolve({ data: buf, width: width, height: height});
+ }).bind(this);
+ var src = 'data:image/jpeg;base64,' + window.btoa(imageData);
+ img.src = src;
+ });
+ },
+
+ sendData: function WorkerTransport_sendData(data) {
+ this.messageHandler.send('GetDocRequest', data);
+ },
+
+ getPage: function WorkerTransport_getPage(pageNumber, promise) {
+ var pageIndex = pageNumber - 1;
+ if (pageIndex in this.pagePromises)
+ return this.pagePromises[pageIndex];
+ var promise = new PDFJS.Promise('Page ' + pageNumber);
+ this.pagePromises[pageIndex] = promise;
+ this.messageHandler.send('GetPageRequest', { pageIndex: pageIndex });
+ return promise;
+ },
+
+ getAnnotations: function WorkerTransport_getAnnotations(pageIndex) {
+ this.messageHandler.send('GetAnnotationsRequest',
+ { pageIndex: pageIndex });
+ }
+ };
+ return WorkerTransport;
+
+})();
var resources = this.resources;
if (isArray(content)) {
// fetching items
+ var streams = [];
var i, n = content.length;
+ var streams = [];
for (i = 0; i < n; ++i)
streams.push(xref.fetchIfRef(content[i]));
content = new StreamsSequenceStream(streams);
var pe = this.pe = new PartialEvaluator(
xref, handler, 'p' + this.pageNumber + '_');
- this.operatorList = pe.getOperatorList(content, resources, dependency);
- this.stats.timeEnd('Build IR Queue');
- return this.operatorList;
+ return pe.getOperatorList(content, resources, dependency);
},
-
+ extractTextContent: function Page_extractTextContent() {
+ if ('textContent' in this) {
+ // text content was extracted
+ return this.textContent;
+ }
+
+ var handler = {
+ on: function nullHandlerOn() {},
+ send: function nullHandlerSend() {}
+ };
+
+ var xref = this.xref;
+ var content = xref.fetchIfRef(this.content);
+ var resources = xref.fetchIfRef(this.resources);
+ if (isArray(content)) {
+ // fetching items
+ var i, n = content.length;
+ var streams = [];
+ for (i = 0; i < n; ++i)
+ streams.push(xref.fetchIfRef(content[i]));
+ content = new StreamsSequenceStream(streams);
+ } else if (isStream(content))
+ content.reset();
+
+ var pe = new PartialEvaluator(
+ xref, handler, 'p' + this.pageNumber + '_');
+ var text = pe.getTextContent(content, resources);
+ return (this.textContent = text);
+ },
-
- display: function Page_display(gfx, callback) {
- var stats = this.stats;
- stats.time('Rendering');
- var xref = this.xref;
- var resources = this.resources;
- var mediaBox = this.mediaBox;
- assertWellFormed(isDict(resources), 'invalid page resources');
-
- gfx.xref = xref;
- gfx.res = resources;
- gfx.beginDrawing({ x: mediaBox[0], y: mediaBox[1],
- width: this.width,
- height: this.height,
- rotate: this.rotate });
-
- var startIdx = 0;
- var length = this.operatorList.fnArray.length;
- var operatorList = this.operatorList;
- var stepper = null;
- if (PDFJS.pdfBug && StepperManager.enabled) {
- stepper = StepperManager.create(this.pageNumber);
- stepper.init(operatorList);
- stepper.nextBreakPoint = stepper.getNextBreakPoint();
- }
-
- var self = this;
- function next() {
- startIdx =
- gfx.executeOperatorList(operatorList, startIdx, next, stepper);
- if (startIdx == length) {
- gfx.endDrawing();
- stats.timeEnd('Rendering');
- stats.timeEnd('Overall');
- if (callback) callback();
- }
- }
- next();
- },
- rotatePoint: function Page_rotatePoint(x, y, reverse) {
- var rotate = reverse ? (360 - this.rotate) : this.rotate;
- switch (rotate) {
- case 180:
- return {x: this.width - x, y: y};
- case 90:
- return {x: this.width - y, y: this.height - x};
- case 270:
- return {x: y, y: x};
- case 360:
- case 0:
- default:
- return {x: x, y: this.height - y};
- }
- },
+ ensureFonts: function Page_ensureFonts(fonts, callback) {
+ this.stats.time('Font Loading');
+ // Convert the font names to the corresponding font obj.
+ for (var i = 0, ii = fonts.length; i < ii; i++) {
+ fonts[i] = this.objs.objs[fonts[i]].data;
+ }
+
+ // Load all the fonts
+ FontLoader.bind(
+ fonts,
+ function pageEnsureFontsFontObjs(fontObjs) {
+ this.stats.timeEnd('Font Loading');
+
+ callback.call(this);
+ }.bind(this)
+ );
+ },
getLinks: function Page_getLinks() {
var links = [];
var annotations = pageGetAnnotations();
}
};
- return PDFDocModel;
+ return PDFDocument;
})();
-var PDFDoc = (function PDFDocClosure() {
- function PDFDoc(arg, callback) {
- var stream = null;
- var data = null;
-
- if (isStream(arg)) {
- stream = arg;
- data = arg.bytes;
- } else if (isArrayBuffer(arg)) {
- stream = new Stream(arg);
- data = arg;
- } else {
- error('PDFDoc: Unknown argument type');
- }
-
- this.data = data;
- this.stream = stream;
- this.pdfModel = new PDFDocModel(stream);
- this.fingerprint = this.pdfModel.getFingerprint();
- this.info = this.pdfModel.getDocumentInfo();
- this.catalog = this.pdfModel.catalog;
- this.objs = new PDFObjects();
-
- this.pageCache = [];
- this.fontsLoading = {};
- this.workerReadyPromise = new Promise('workerReady');
-
- this.pageText = [];
- this.startedTextExtraction = false;
-
- // If worker support isn't disabled explicit and the browser has worker
- // support, create a new web worker and test if it/the browser fullfills
- // all requirements to run parts of pdf.js in a web worker.
- // Right now, the requirement is, that an Uint8Array is still an Uint8Array
- // as it arrives on the worker. Chrome added this with version 15.
- if (!globalScope.PDFJS.disableWorker && typeof Worker !== 'undefined') {
- var workerSrc = PDFJS.workerSrc;
- if (typeof workerSrc === 'undefined') {
- error('No PDFJS.workerSrc specified');
- }
-
- try {
- var worker;
- if (PDFJS.isFirefoxExtension) {
- // The firefox extension can't load the worker from the resource://
- // url so we have to inline the script and then use the blob loader.
- var bb = new MozBlobBuilder();
- bb.append(document.querySelector('#PDFJS_SCRIPT_TAG').textContent);
- var blobUrl = window.URL.createObjectURL(bb.getBlob());
- worker = new Worker(blobUrl);
- } else {
- // Some versions of FF can't create a worker on localhost, see:
- // https://bugzilla.mozilla.org/show_bug.cgi?id=683280
- worker = new Worker(workerSrc);
- }
-
- var messageHandler = new MessageHandler('main', worker);
-
- messageHandler.on('test', function pdfDocTest(supportTypedArray) {
- if (supportTypedArray) {
- this.worker = worker;
- this.setupMessageHandler(messageHandler);
- } else {
- globalScope.PDFJS.disableWorker = true;
- this.setupFakeWorker();
- }
- }.bind(this));
-
- var testObj = new Uint8Array(1);
- // Some versions of Opera throw a DATA_CLONE_ERR on
- // serializing the typed array.
- messageHandler.send('test', testObj);
- return;
- } catch (e) {
- warn('The worker has been disabled.');
- }
- }
- // Either workers are disabled, not supported or have thrown an exception.
- // Thus, we fallback to a faked worker.
- globalScope.PDFJS.disableWorker = true;
- this.setupFakeWorker();
- }
-
- PDFDoc.prototype = {
- setupFakeWorker: function PDFDoc_setupFakeWorker() {
- // If we don't use a worker, just post/sendMessage to the main thread.
- var fakeWorker = {
- postMessage: function PDFDoc_postMessage(obj) {
- fakeWorker.onmessage({data: obj});
- },
- terminate: function PDFDoc_terminate() {}
- };
-
- var messageHandler = new MessageHandler('main', fakeWorker);
- this.setupMessageHandler(messageHandler);
-
- // If the main thread is our worker, setup the handling for the messages
- // the main thread sends to it self.
- WorkerMessageHandler.setup(messageHandler);
- },
-
- setupMessageHandler: function PDFDoc_setupMessageHandler(messageHandler) {
- this.messageHandler = messageHandler;
-
- messageHandler.on('page', function pdfDocPage(data) {
- var pageNum = data.pageNum;
- var page = this.pageCache[pageNum];
- var depFonts = data.depFonts;
-
- page.stats.timeEnd('Page Request');
- page.startRenderingFromOperatorList(data.operatorList, depFonts);
- }, this);
-
- messageHandler.on('obj', function pdfDocObj(data) {
- var id = data[0];
- var type = data[1];
-
- switch (type) {
- case 'JpegStream':
- var imageData = data[2];
- loadJpegStream(id, imageData, this.objs);
- break;
- case 'Image':
- var imageData = data[2];
- this.objs.resolve(id, imageData);
- break;
- case 'Font':
- var name = data[2];
- var file = data[3];
- var properties = data[4];
-
- if (file) {
- // Rewrap the ArrayBuffer in a stream.
- var fontFileDict = new Dict();
- file = new Stream(file, 0, file.length, fontFileDict);
- }
-
- // At this point, only the font object is created but the font is
- // not yet attached to the DOM. This is done in `FontLoader.bind`.
- var font = new Font(name, file, properties);
- this.objs.resolve(id, font);
- break;
- default:
- error('Got unkown object type ' + type);
- }
- }, this);
-
- messageHandler.on('page_error', function pdfDocError(data) {
- var page = this.pageCache[data.pageNum];
- if (page.displayReadyPromise)
- page.displayReadyPromise.reject(data.error);
- else
- error(data.error);
- }, this);
-
- messageHandler.on('text_extracted', function pdfTextExtracted(data) {
- var pageNum = data[0];
- var content = data[1];
- if (pageNum !== this.pageText.length + 1)
- error('pdfTextExtracted: pageIdx and pageText length got to fit');
-
- this.pageText.push(content);
-
- if (this.textExtracted)
- this.textExtracted(pageNum, content);
-
- if (pageNum < this.numPages)
- this.extractTextPage(pageNum + 1);
- }, this);
-
- messageHandler.on('jpeg_decode', function(data, promise) {
- var imageData = data[0];
- var components = data[1];
- if (components != 3 && components != 1)
- error('Only 3 component or 1 component can be returned');
-
- var img = new Image();
- img.onload = (function messageHandler_onloadClosure() {
- var width = img.width;
- var height = img.height;
- var size = width * height;
- var rgbaLength = size * 4;
- var buf = new Uint8Array(size * components);
- var tmpCanvas = createScratchCanvas(width, height);
- var tmpCtx = tmpCanvas.getContext('2d');
- tmpCtx.drawImage(img, 0, 0);
- var data = tmpCtx.getImageData(0, 0, width, height).data;
-
- if (components == 3) {
- for (var i = 0, j = 0; i < rgbaLength; i += 4, j += 3) {
- buf[j] = data[i];
- buf[j + 1] = data[i + 1];
- buf[j + 2] = data[i + 2];
- }
- } else if (components == 1) {
- for (var i = 0, j = 0; i < rgbaLength; i += 4, j++) {
- buf[j] = data[i];
- }
- }
- promise.resolve({ data: buf, width: width, height: height});
- }).bind(this);
- var src = 'data:image/jpeg;base64,' + window.btoa(imageData);
- img.src = src;
- });
-
- setTimeout(function pdfDocFontReadySetTimeout() {
- messageHandler.send('doc', this.data);
- this.workerReadyPromise.resolve(true);
- }.bind(this));
- },
-
- get numPages() {
- return this.pdfModel.numPages;
- },
-
- startRendering: function PDFDoc_startRendering(page) {
- // The worker might not be ready to receive the page request yet.
- this.workerReadyPromise.then(function pdfDocStartRenderingThen() {
- page.stats.time('Page Request');
- this.messageHandler.send('page_request', page.pageNumber + 1);
- }.bind(this));
- },
-
- getPage: function PDFDoc_getPage(n) {
- if (this.pageCache[n])
- return this.pageCache[n];
-
- var page = this.pdfModel.getPage(n);
- // Add a reference to the objects such that Page can forward the reference
- // to the CanvasGraphics and so on.
- page.objs = this.objs;
- page.pdf = this;
- return (this.pageCache[n] = page);
- },
-
- extractTextPage: function PDFDoc_extractTextPage(pageNum) {
- this.messageHandler.send('extract_text', pageNum);
- },
-
- extractText: function PDFDoc_extractText() {
- if (this.startedTextExtraction)
- return;
-
- this.startedTextExtraction = true;
-
- this.workerReadyPromise.then(function pdfDocStartRenderingThen() {
- // Start the text extraction process.
- this.extractTextPage(1);
- }.bind(this));
- },
-
- destroy: function PDFDoc_destroy() {
- if (this.worker)
- this.worker.terminate();
-
- if (this.fontWorker)
- this.fontWorker.terminate();
-
- for (var n in this.pageCache)
- delete this.pageCache[n];
-
- delete this.data;
- delete this.stream;
- delete this.pdf;
- delete this.catalog;
- }
- };
-
- return PDFDoc;
-})();
-
-globalScope.PDFJS.PDFDoc = PDFDoc;
-
+
font = xref.fetchIfRef(font) || fontRes.get(fontName);
assertWellFormed(isDict(font));
- ++self.objIdCounter;
- if (!font.translated) {
++
+ if (!font.loadedName) {
++ ++self.objIdCounter;
font.translated = self.translateFont(font, xref, resources,
dependency);
if (font.translated) {
handler.send('test', data instanceof Uint8Array);
});
- handler.on('doc', function wphSetupDoc(data) {
+ handler.on('GetDocRequest', function wphSetupDoc(data) {
// Create only the model of the PDFDoc, which is enough for
// processing the content of the pdf.
- pdfModel = new PDFDocModel(new Stream(data));
+ pdfModel = new PDFDocument(new Stream(data));
+ var doc = {
+ numPages: pdfModel.numPages,
+ fingerprint: pdfModel.getFingerprint(),
+ destinations: pdfModel.catalog.destinations,
+ outline: pdfModel.catalog.documentOutline,
+ info: pdfModel.getDocumentInfo(),
+ metadata: pdfModel.catalog.metadata
+ };
+ handler.send('GetDoc', {pdfInfo: doc});
});
- handler.on('page_request', function wphSetupPageRequest(pageNum) {
- pageNum = parseInt(pageNum);
+ handler.on('GetPageRequest', function wphSetupGetPage(data) {
+ var pageNumber = data.pageIndex + 1;
+ var pdfPage = pdfModel.getPage(pageNumber);
+ var page = {
+ pageIndex: data.pageIndex,
+ rotate: pdfPage.rotate,
+ ref: pdfPage.ref,
+ view: pdfPage.view
+ };
+ handler.send('GetPage', {pageInfo: page});
+ });
+
+ handler.on('GetAnnotationsRequest', function wphSetupGetAnnotations(data) {
+ var pdfPage = pdfModel.getPage(data.pageIndex + 1);
+ handler.send('GetAnnotations', {
+ pageIndex: data.pageIndex,
+ annotations: pdfPage.getAnnotations()
+ });
+ });
+
+ handler.on('RenderPageRequest', function wphSetupRenderPage(data) {
+ var pageNum = data.pageIndex + 1;
-
// The following code does quite the same as
// Page.prototype.startRendering, but stops at one point and sends the
// result back to the main thread.
depFonts: Object.keys(fonts)
});
}, this);
- handler.on('extract_text', function wphExtractText(pageNum) {
+
- handler.send('text_extracted', [pageNum, textContent]);
++ handler.on('GetTextContent', function wphExtractText(data, promise) {
++ var pageNum = data.pageIndex + 1;
+ var start = Date.now();
+
+ var textContent = '';
+ try {
+ var page = pdfModel.getPage(pageNum);
+ textContent = page.extractTextContent();
++ promise.resolve(textContent);
+ } catch (e) {
+ // Skip errored pages
++ promise.reject(e);
+ }
+
+ console.log('text indexing: page=%d - time=%dms',
+ pageNum, Date.now() - start);
+ });
}
};
currentScale: kUnknownScale,
currentScaleValue: null,
initialBookmark: document.location.hash.substring(1),
++ startedTextExtraction: false,
++ pageText: [],
setScale: function pdfViewSetScale(val, resetAutoSettings) {
if (val == this.currentScale)
// Setting the default one.
this.parseScale(kDefaultScale, true);
}
-
- this.metadata = null;
- var metadata = pdf.catalog.metadata;
- var info = this.documentInfo = pdf.info;
- var pdfTitle;
-
- if (metadata) {
- this.metadata = metadata = new PDFJS.Metadata(metadata);
-
- if (metadata.has('dc:title'))
- pdfTitle = metadata.get('dc:title');
- }
-
- if (!pdfTitle && info && info['Title'])
- pdfTitle = info['Title'];
-
- if (pdfTitle)
- document.title = pdfTitle + ' - ' + document.title;
-
- pdf.textExtracted = (function pdfTextExtracted(pageIdx, content) {
- this.search();
- }).bind(this);
- },
-
- startTextExtraction: function pdfViewStartTextExtraction(pdf) {
- var searchResults = document.getElementById('searchResults');
- searchResults.textContent = '';
-
- this.pdfDoc.extractText();
},
- if (!this.searchTimer)
- this.searchTimer =
- setTimeout(this.search, SEARCH_TIMEOUT - (now - lastSeach));
-
+ search: function pdfViewStartSearch() {
+ // Limit this function to run every <SEARCH_TIMEOUT>ms.
+ var SEARCH_TIMEOUT = 250;
+ var lastSeach = this.lastSearch;
+ var now = Date.now();
+ if (lastSeach && (now - lastSeach) < SEARCH_TIMEOUT) {
- var index = PDFView.pdfDoc.pageText;
++ if (!this.searchTimer) {
++ this.searchTimer = setTimeout(function resumeSearch() {
++ PDFView.search();
++ },
++ SEARCH_TIMEOUT - (now - lastSeach)
++ );
++ }
+ return;
+ }
+ this.searchTimer = null;
+ this.lastSearch = now;
+
+ function bindLink(link, pageNumber) {
+ link.href = '#' + pageNumber;
+ link.onclick = function searchBindLink() {
+ PDFView.page = pageNumber;
+ return false;
+ };
+ }
+
+ var searchResults = document.getElementById('searchResults');
+
+ var searchTermsInput = document.getElementById('searchTermsInput');
+ searchResults.removeAttribute('hidden');
+ searchResults.textContent = '';
+
+ var terms = searchTermsInput.value;
+
+ if (!terms)
+ return;
+
+ // simple search: removing spaces and hyphens, then scanning every
+ terms = terms.replace(/\s-/g, '').toLowerCase();
++ var index = PDFView.pageText;
+ var pageFound = false;
+ for (var i = 0, ii = index.length; i < ii; i++) {
+ var pageText = index[i].replace(/\s-/g, '').toLowerCase();
+ var j = pageText.indexOf(terms);
+ if (j < 0)
+ continue;
+
+ var pageNumber = i + 1;
+ var textSample = index[i].substr(j, 50);
+ var link = document.createElement('a');
+ bindLink(link, pageNumber);
+ link.textContent = 'Page ' + pageNumber + ': ' + textSample;
+ searchResults.appendChild(link);
+
+ pageFound = true;
+ }
+ if (!pageFound) {
+ searchResults.textContent = '(Not found)';
+ }
+ },
+
setHash: function pdfViewSetHash(hash) {
if (!hash)
return;
switchSidebarView: function pdfViewSwitchSidebarView(view) {
var thumbsScrollView = document.getElementById('sidebarScrollView');
- var outlineScrollView = document.getElementById('outlineScrollView');
var thumbsSwitchButton = document.getElementById('thumbsSwitch');
+ if (view == 'thumbs') {
+ thumbsScrollView.removeAttribute('hidden');
+ thumbsSwitchButton.setAttribute('data-selected', true);
+ } else {
+ thumbsScrollView.setAttribute('hidden', 'true');
+ thumbsSwitchButton.removeAttribute('data-selected');
+ }
+
+ var outlineScrollView = document.getElementById('outlineScrollView');
var outlineSwitchButton = document.getElementById('outlineSwitch');
- switch (view) {
- case 'thumbs':
- thumbsScrollView.removeAttribute('hidden');
- outlineScrollView.setAttribute('hidden', 'true');
- thumbsSwitchButton.setAttribute('data-selected', true);
- outlineSwitchButton.removeAttribute('data-selected');
- updateThumbViewArea();
- break;
- case 'outline':
- thumbsScrollView.setAttribute('hidden', 'true');
- outlineScrollView.removeAttribute('hidden');
- thumbsSwitchButton.removeAttribute('data-selected');
- outlineSwitchButton.setAttribute('data-selected', true);
- break;
+ if (view == 'outline') {
+ outlineScrollView.removeAttribute('hidden');
+ outlineSwitchButton.setAttribute('data-selected', true);
+ } else {
+ outlineScrollView.setAttribute('hidden', 'true');
+ outlineSwitchButton.removeAttribute('data-selected');
+ }
+
+ var searchScrollView = document.getElementById('searchScrollView');
+ var searchSwitchButton = document.getElementById('searchSwitch');
+ if (view == 'search') {
+ searchScrollView.removeAttribute('hidden');
+ searchSwitchButton.setAttribute('data-selected', true);
+
+ var searchTermsInput = document.getElementById('searchTermsInput');
+ searchTermsInput.focus();
+
+ // Start text extraction as soon as the search gets displayed.
- this.pdfDoc.extractText();
++ this.extractText();
+ } else {
+ searchScrollView.setAttribute('hidden', 'true');
+ searchSwitchButton.removeAttribute('data-selected');
}
},
++ extractText: function() {
++ if (this.startedTextExtraction)
++ return;
++ this.startedTextExtraction = true;
++ var self = this;
++ function extractPageText(pageIndex) {
++ self.pages[pageIndex].pdfPage.getTextContent().then(
++ function textContentResolved(textContent) {
++ self.pageText[pageIndex] = textContent;
++ self.search();
++ if ((pageIndex + 1) < self.pages.length)
++ extractPageText(pageIndex + 1);
++ }
++ );
++ };
++ extractPageText(0);
++ },
++
pinSidebar: function pdfViewPinSidebar() {
document.getElementById('sidebar').classList.toggle('pinned');
},