throw data.error;
}, this);
- var index = data.index;
+ messageHandler.on('text_extracted', function pdfDocError(data) {
++ var index = data[0];
+ if (this.textExtracted)
+ this.textExtracted(index);
+ }, this);
+
+ messageHandler.on('jpeg_decode', function(data, promise) {
+ var imageData = data[0];
+ var components = data[1];
+ if (components != 3 && components != 1)
+ error('Only 3 component or 1 component can be returned');
+
+ var img = new Image();
+ img.onload = (function jpegImageLoaderOnload() {
+ var width = img.width;
+ var height = img.height;
+ var size = width * height;
+ var rgbaLength = size * 4;
+ var buf = new Uint8Array(size * components);
+ var tmpCanvas = new ScratchCanvas(width, height);
+ var tmpCtx = tmpCanvas.getContext('2d');
+ tmpCtx.drawImage(img, 0, 0);
+ var data = tmpCtx.getImageData(0, 0, width, height).data;
+
+ if (components == 3) {
+ for (var i = 0, j = 0; i < rgbaLength; i += 4, j += 3) {
+ buf[j] = data[i];
+ buf[j + 1] = data[i + 1];
+ buf[j + 2] = data[i + 2];
+ }
+ } else if (components == 1) {
+ for (var i = 0, j = 0; i < rgbaLength; i += 4, j++) {
+ buf[j] = data[i];
+ }
+ }
+ promise.resolve({ data: buf, width: width, height: height});
+ }).bind(this);
+ var src = 'data:image/jpeg;base64,' + window.btoa(imageData);
+ img.src = src;
+ });
+
setTimeout(function pdfDocFontReadySetTimeout() {
messageHandler.send('doc', this.data);
this.workerReadyPromise.resolve(true);
return rule;
},
+ get spaceWidth() {
+ // trying to estimate space character width
+ var possibleSpaceReplacements = ['space', 'minus', 'one', 'i'];
+ var width;
+ for (var i = 0, ii = possibleSpaceReplacements.length; i < ii; i++) {
+ var glyphName = possibleSpaceReplacements[i];
+ // if possible, getting width by glyph name
+ if (glyphName in this.widths) {
+ width = this.widths[glyphName];
+ break;
+ }
+ var glyphUnicode = GlyphsUnicode[glyphName];
+ // finding the charcode via unicodeToCID map
+ var charcode = 0;
+ if (this.composite)
+ charcode = this.unicodeToCID[glyphUnicode];
+ // ... via toUnicode map
+ if (!charcode && 'toUnicode' in this)
+ charcode = this.toUnicode.indexOf(glyphUnicode);
+ // setting it to unicode if negative or undefined
+ if (!(charcode > 0))
+ charcode = glyphUnicode;
+ // trying to get width via charcode
+ width = this.widths[charcode];
+ if (width)
+ break; // the non-zero width found
+ }
+ width = (width || this.defaultWidth) * this.widthMultiplier;
+ return shadow(this, 'spaceWidth', width);
+ },
+
charToGlyph: function fonts_charToGlyph(charcode) {
- var unicode, width, codeIRQueue;
+ var fontChar, width, codeIRQueue;
var width = this.widths[charcode];
handler.send('font_ready', [objId, obj]);
});
- handler.send('text_extracted', { index: index });
+
+ handler.on('extract_text', function wphExtractText() {
+ var numPages = pdfDoc.numPages;
+ var index = [];
+ var start = Date.now();
+
+ function indexPage(pageNum) {
+ if (pageNum > numPages) {
+ console.log('text indexing=: time=%dms', Date.now() - start);
+
++ handler.send('text_extracted', [index]);
+ return;
+ }
+
+ var textContent = '';
+ try {
+ var page = pdfDoc.getPage(pageNum);
+ textContent = page.extractTextContent();
+ } catch (e) {
+ // Skip errored pages
+ }
+
+ index.push(textContent);
+
+ // processing one page, interrupting thread to process
+ // other requests
+ setTimeout(function extractTextNextPage() {
+ indexPage(pageNum + 1);
+ }, 0);
+ }
+
+ indexPage(1);
+ });
}
};