this.fontsLoading = {};
this.workerReadyPromise = new Promise('workerReady');
+ this.pageText = [];
+ this.startedTextExtraction = false;
+
// If worker support isn't disabled explicit and the browser has worker
// support, create a new web worker and test if it/the browser fullfills
// all requirements to run parts of pdf.js in a web worker.
WorkerMessageHandler.setup(messageHandler);
},
-
setupMessageHandler: function PDFDoc_setupMessageHandler(messageHandler) {
this.messageHandler = messageHandler;
}, this);
messageHandler.on('text_extracted', function pdfTextExtracted(data) {
- var index = data[0];
+ var pageNum = data[0];
+ var content = data[1];
+ if (pageNum !== this.pageText.length + 1)
+ error('pdfTextExtracted: pageIdx and pageText length got to fit');
+
+ this.pageText.push(content);
+
if (this.textExtracted)
- this.textExtracted(index);
+ this.textExtracted(pageNum, content);
+
+ if (pageNum < this.numPages)
+ this.extractTextPage(pageNum + 1);
}, this);
messageHandler.on('jpeg_decode', function(data, promise) {
return (this.pageCache[n] = page);
},
+ extractTextPage: function PDFDoc_extractTextPage(pageNum) {
+ this.messageHandler.send('extract_text', pageNum);
+ },
+
extractText: function PDFDoc_extractText() {
+ if (this.startedTextExtraction)
+ return;
+
+ this.startedTextExtraction = true;
+
this.workerReadyPromise.then(function pdfDocStartRenderingThen() {
- this.messageHandler.send('extract_text');
+ // Start the text extraction process.
+ this.extractTextPage(1);
}.bind(this));
},
handler.on('page_request', function wphSetupPageRequest(pageNum) {
pageNum = parseInt(pageNum);
-
// The following code does quite the same as
// Page.prototype.startRendering, but stops at one point and sends the
// result back to the main thread.
});
}, this);
- handler.on('extract_text', function wphExtractText() {
- var numPages = pdfModel.numPages;
- var index = [];
+ handler.on('extract_text', function wphExtractText(pageNum) {
var start = Date.now();
- function indexPage(pageNum) {
- if (pageNum > numPages) {
- console.log('text indexing: time=%dms', Date.now() - start);
-
- handler.send('text_extracted', [index]);
- return;
- }
-
- var textContent = '';
- // try {
- var page = pdfModel.getPage(pageNum);
- textContent = page.extractTextContent();
- // } catch (e) {
- // // Skip errored pages
- // }
-
- index.push(textContent);
-
- // processing one page, interrupting thread to process
- // other requests
- setTimeout(function extractTextNextPage() {
- indexPage(pageNum + 1);
- }, 0);
+ var textContent = '';
+ try {
+ var page = pdfModel.getPage(pageNum);
+ textContent = page.extractTextContent();
+ } catch (e) {
+ // Skip errored pages
}
- indexPage(1);
+ console.log('text indexing: page=%d - time=%dms',
+ pageNum, Date.now() - start);
+ handler.send('text_extracted', [pageNum, textContent]);
});
}
};
var pdf;
try {
- pdf = new PDFJS.PDFDoc(data);
+ this.pdfDoc = pdf = new PDFJS.PDFDoc(data);
} catch (e) {
this.error('An error occurred while reading the PDF.', e);
}
if (pdfTitle)
document.title = pdfTitle + ' - ' + document.title;
-
- // loosing pdf reference here, starting text indexing in 500ms
- setTimeout((function loadStartTextExtraction() {
- this.startTextExtraction(pdf);
- }).bind(this), 500);
- delete PDFView.extractedText;
},
startTextExtraction: function pdfViewStartTextExtraction(pdf) {
var searchResults = document.getElementById('searchResults');
searchResults.textContent = '';
- pdf.textExtracted = function pdfTextExtracted(index) {
- PDFView.extractedText = index;
- };
+ pdf.textExtracted = (function pdfTextExtracted(pageIdx, content) {
+ this.search();
+ }).bind(this);
pdf.extractText();
+
+ this.pdfDoc = pdf;
},
search: function pdfViewStartSearch() {
}
var searchResults = document.getElementById('searchResults');
- if (!('extractedText' in PDFView)) {
- // not indexed yet, repeat in 1 second
- searchResults.textContent = 'Searching...';
- setTimeout(this.search.bind(this), 1000);
- return;
- }
var searchTermsInput = document.getElementById('searchTermsInput');
searchResults.removeAttribute('hidden');
searchResults.textContent = '';
var terms = searchTermsInput.value;
+
+ if (!terms)
+ return;
+
// simple search: removing spaces and hyphens, then scanning every
terms = terms.replace(/\s-/g, '').toLowerCase();
- var index = PDFView.extractedText;
+ var index = PDFView.pdfDoc.pageText;
var pageFound = false;
for (var i = 0, ii = index.length; i < ii; i++) {
var pageText = index[i].replace(/\s-/g, '').toLowerCase();
var searchTermsInput = document.getElementById('searchTermsInput');
searchTermsInput.focus();
+
+ // Start text extraction as soon as the search gets displayed.
+ this.pdfDoc.extractText();
} else {
searchScrollView.setAttribute('hidden', 'true');
searchSwitchButton.removeAttribute('data-selected');