]> git.parisson.com Git - pdf.js.git/commitdiff
Merge branch 'master' of git://github.com/mozilla/pdf.js.git into textsearch-1
authornotmasteryet <async.processingjs@yahoo.com>
Thu, 15 Dec 2011 01:37:21 +0000 (19:37 -0600)
committernotmasteryet <async.processingjs@yahoo.com>
Thu, 15 Dec 2011 01:37:21 +0000 (19:37 -0600)
Conflicts:
src/core.js

1  2 
src/core.js
src/evaluator.js
src/fonts.js
src/worker.js
web/viewer.js

diff --cc src/core.js
index 6a932f1276014d4da426b5373261a36bc73b9aff,a6effd2bd5acdf1777a045e13b19a03dc071eee5..b498401d1ec7b7e20857eb1fd66644beeb1e8595
@@@ -646,12 -618,41 +650,47 @@@ var PDFDoc = (function PDFDocClosure() 
            throw data.error;
        }, this);
  
-         var index = data.index;
 +      messageHandler.on('text_extracted', function pdfDocError(data) {
++        var index = data[0];
 +        if (this.textExtracted)
 +          this.textExtracted(index);
 +      }, this);
 +
+       messageHandler.on('jpeg_decode', function(data, promise) {
+         var imageData = data[0];
+         var components = data[1];
+         if (components != 3 && components != 1)
+           error('Only 3 component or 1 component can be returned');
+         var img = new Image();
+         img.onload = (function jpegImageLoaderOnload() {
+           var width = img.width;
+           var height = img.height;
+           var size = width * height;
+           var rgbaLength = size * 4;
+           var buf = new Uint8Array(size * components);
+           var tmpCanvas = new ScratchCanvas(width, height);
+           var tmpCtx = tmpCanvas.getContext('2d');
+           tmpCtx.drawImage(img, 0, 0);
+           var data = tmpCtx.getImageData(0, 0, width, height).data;
+           if (components == 3) {
+             for (var i = 0, j = 0; i < rgbaLength; i += 4, j += 3) {
+               buf[j] = data[i];
+               buf[j + 1] = data[i + 1];
+               buf[j + 2] = data[i + 2];
+             }
+           } else if (components == 1) {
+             for (var i = 0, j = 0; i < rgbaLength; i += 4, j++) {
+               buf[j] = data[i];
+             }
+           }
+           promise.resolve({ data: buf, width: width, height: height});
+         }).bind(this);
+         var src = 'data:image/jpeg;base64,' + window.btoa(imageData);
+         img.src = src;
+       });
        setTimeout(function pdfDocFontReadySetTimeout() {
          messageHandler.send('doc', this.data);
          this.workerReadyPromise.resolve(true);
Simple merge
diff --cc src/fonts.js
index 3c65a1a07ea4a3238a9b91fe1b273e4f1c7d084e,83ce4abaabd4488dd8d16cfec079c46ece79204c..6bbbaf0147e125ae6e1d3a0cb29ba5d5a1a664ec
@@@ -2139,8 -2101,39 +2143,39 @@@ var Font = (function FontClosure() 
        return rule;
      },
  
+     get spaceWidth() {
+       // trying to estimate space character width
+       var possibleSpaceReplacements = ['space', 'minus', 'one', 'i'];
+       var width;
+       for (var i = 0, ii = possibleSpaceReplacements.length; i < ii; i++) {
+         var glyphName = possibleSpaceReplacements[i];
+         // if possible, getting width by glyph name
+         if (glyphName in this.widths) {
+           width = this.widths[glyphName];
+           break;
+         }
+         var glyphUnicode = GlyphsUnicode[glyphName];
+         // finding the charcode via unicodeToCID map
+         var charcode = 0;
+         if (this.composite)
+           charcode = this.unicodeToCID[glyphUnicode];
+         // ... via toUnicode map
+         if (!charcode && 'toUnicode' in this)
+           charcode = this.toUnicode.indexOf(glyphUnicode);
+         // setting it to unicode if negative or undefined
+         if (!(charcode > 0))
+           charcode = glyphUnicode;
+         // trying to get width via charcode
+         width = this.widths[charcode];
+         if (width)
+           break; // the non-zero width found
+       }
+       width = (width || this.defaultWidth) * this.widthMultiplier;
+       return shadow(this, 'spaceWidth', width);
+     },
      charToGlyph: function fonts_charToGlyph(charcode) {
 -      var unicode, width, codeIRQueue;
 +      var fontChar, width, codeIRQueue;
  
        var width = this.widths[charcode];
  
diff --cc src/worker.js
index 3cc91d07e39c083de81807851334ab9ff09c00fb,c18de65ad05808fd6a705ed0116cda98b12a1605..dea6339d1765f78b7537b255839e9d83165fcbda
@@@ -160,39 -195,6 +195,39 @@@ var WorkerMessageHandler = 
  
        handler.send('font_ready', [objId, obj]);
      });
-           handler.send('text_extracted', { index: index });
 +
 +    handler.on('extract_text', function wphExtractText() {
 +      var numPages = pdfDoc.numPages;
 +      var index = [];
 +      var start = Date.now();
 +
 +      function indexPage(pageNum) {
 +        if (pageNum > numPages) {
 +          console.log('text indexing=: time=%dms', Date.now() - start);
 +
++          handler.send('text_extracted', [index]);
 +          return;
 +        }
 +
 +        var textContent = '';
 +        try {
 +          var page = pdfDoc.getPage(pageNum);
 +          textContent = page.extractTextContent();
 +        } catch (e) {
 +          // Skip errored pages
 +        }
 +
 +        index.push(textContent);
 +
 +        // processing one page, interrupting thread to process
 +        // other requests
 +        setTimeout(function extractTextNextPage() {
 +          indexPage(pageNum + 1);
 +        }, 0);
 +      }
 +
 +      indexPage(1);
 +    });
    }
  };
  
diff --cc web/viewer.js
Simple merge