Getting addition encoding information for cmap from the CFF data (#641)

author notmasteryet <async.processingjs@yahoo.com>

Mon, 10 Oct 2011 03:40:49 +0000 (22:40 -0500)

committer notmasteryet <async.processingjs@yahoo.com>

Mon, 10 Oct 2011 03:40:49 +0000 (22:40 -0500)
author notmasteryet <async.processingjs@yahoo.com>
Mon, 10 Oct 2011 03:40:49 +0000 (22:40 -0500)
committer notmasteryet <async.processingjs@yahoo.com>
Mon, 10 Oct 2011 03:40:49 +0000 (22:40 -0500)
diff --git a/fonts.js b/fonts.js

index 2ab3a90b76eee461e938d5b55ffb41994d6fa752..4f0d55d74e9432a832c380fb5774af22b365eb88 100644 (file)
--- a/fonts.js
+++ b/fonts.js
@@ -2488,7 +2488,7 @@ var Type2CFF = (function type2CFF() {
        var charStrings = this.parseIndex(topDict.CharStrings);
        var charset = this.parseCharsets(topDict.charset,
                                         charStrings.length, strings);
-      var hasSupplement = this.parseEncoding(topDict.Encoding, properties,
+      var encoding = this.parseEncoding(topDict.Encoding, properties,
                                               strings, charset);
  
        // The font sanitizer does not support CFF encoding with a
@@ -2496,8 +2496,8 @@ var Type2CFF = (function type2CFF() {
        // between gid to glyph, let's overwrite what is declared in
        // the top dictionary to let the sanitizer think the font use
        // StandardEncoding, that's a lie but that's ok.
-      if (hasSupplement)
-        bytes[topDict.Encoding] = 0;
+      if (encoding.hasSupplement)
+        bytes[topDict.Encoding] &= 0x7F;
  
        // The CFF specification state that the 'dotsection' command
        // (12, 0) is deprecated and treated as a no-op, but all Type2
@@ -2528,7 +2528,7 @@ var Type2CFF = (function type2CFF() {
  
        // charstrings contains info about glyphs (one element per glyph
        // containing mappings for {unicode, width})
-      var charstrings = this.getCharStrings(charset, charStrings,
+      var charstrings = this.getCharStrings(charset, encoding.encoding,
                                              privateDict, this.properties);
  
        // create the mapping between charstring and glyph id
@@ -2545,49 +2545,82 @@ var Type2CFF = (function type2CFF() {
        return data;
      },
  
-    getCharStrings: function cff_charstrings(charsets, charStrings,
+    getCharStrings: function cff_charstrings(charsets, encoding,
                                               privateDict, properties) {
        var defaultWidth = privateDict['defaultWidthX'];
        var charstrings = [];
        var differences = properties.differences;
-      var index = properties.firstChar || 0;
        for (var i = 1; i < charsets.length; i++) {
-        var code = -1;
+        var inDifferences;
          var glyph = charsets[i];
+        var code;
          for (var j = 0; j < differences.length; j++) {
            if (differences[j] == glyph) {
-            index = j;
-            code = differences.indexOf(glyph);
+            code = j;
+            inDifferences = true;
              break;
            }
          }
+        if (!inDifferences) {
+          var code = properties.firstChar + i;
+          for (var s in encoding) {
+            if (encoding[s] == i) {
+              code = s | 0;
+              break;
+            }
+          }
+        }
+
+        if (properties.encoding[code] &&
+            properties.encoding[code].inDifferences)
+            continue;
  
-        var mapping =
-          properties.glyphs[glyph] || properties.glyphs[index] || {};
-        if (code == -1)
-          index = code = mapping.unicode || index;
+        var mapping = properties.glyphs[code] || properties.glyphs[glyph] || {};
+        var unicode = mapping.unicode || code;
  
-        if (code <= 0x1f || (code >= 127 && code <= 255))
-          code += kCmapGlyphOffset;
+        if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255))
+          unicode += kCmapGlyphOffset;
  
-        var width = mapping.width;
-        properties.glyphs[glyph] = properties.encoding[index] = {
-          unicode: code,
-          width: isNum(width) ? width : defaultWidth
+        var width = isNum(mapping.width) ? mapping.width : defaultWidth;
+        properties.encoding[code] = {
+          unicode: unicode,
+          width: width,
+          inDifferences: inDifferences
          };
  
          charstrings.push({
-          unicode: code,
+          unicode: unicode,
            width: width,
            gid: i
          });
-        index++;
        }
  
        // sort the array by the unicode value
        charstrings.sort(function type2CFFGetCharStringsSort(a, b) {
          return a.unicode - b.unicode;
        });
+
+      // remove duplicates -- they might appear during selection:
+      //   properties.glyphs[code] || properties.glyphs[glyph]
+      // TODO make more deterministic
+      var nextUnusedUnicode = kCmapGlyphOffset + 0x0020;
+      var lastUnicode = charstrings[0].unicode, wasModified = false;
+      for (var i = 1; i < charstrings.length; ++i) {
+        if (lastUnicode != charstrings[i].unicode) {
+          lastUnicode = charstrings[i].unicode;
+          continue;
+        }
+        // duplicate found -- changing the unicode for previous one
+        charstrings[i - 1].unicode = nextUnusedUnicode++;
+        wasModified = true;
+      }
+      if (!wasModified)
+        return charstrings;
+
+      // sort the array by the unicode value (again)
+      charstrings.sort(function type2CFFGetCharStringsSort(a, b) {
+        return a.unicode - b.unicode;
+      });
        return charstrings;
      },
  
@@ -2595,6 +2628,10 @@ var Type2CFF = (function type2CFF() {
                                                charset) {
        var encoding = {};
        var bytes = this.bytes;
+      var result = {
+        encoding: encoding,
+        hasSupplement: false
+      };
  
        function readSupplement() {
          var supplementsCount = bytes[pos++];
@@ -2621,11 +2658,6 @@ var Type2CFF = (function type2CFF() {
              var glyphsCount = bytes[pos++];
              for (var i = 1; i <= glyphsCount; i++)
                encoding[bytes[pos++]] = i;
-
-            if (format & 0x80) {
-              readSupplement();
-              return true;
-            }
              break;
  
            case 1:
@@ -2637,19 +2669,18 @@ var Type2CFF = (function type2CFF() {
                for (var j = start; j <= start + count; j++)
                  encoding[j] = gid++;
              }
-
-            if (format & 0x80) {
-              readSupplement();
-              return true;
-            }
              break;
  
            default:
              error('Unknow encoding format: ' + format + ' in CFF');
              break;
          }
+        if (format & 0x80) {
+          readSupplement();
+          result.hasSupplement = true;
+        }
        }
-      return false;
+      return result;
      },
  
      parseCharsets: function cff_parsecharsets(pos, length, strings) {
diff --git a/pdf.js b/pdf.js

index 847ed2ff48ddcaf2d8bb004619d5a2a05830a75a..82345b17bad9300dded1839610554569a940cdac 100644 (file)
--- a/pdf.js
+++ b/pdf.js
@@ -3558,6 +3558,12 @@ var Page = (function pagePage() {
        var self = this;
        var stats = self.stats;
        stats.compile = stats.fonts = stats.render = 0;
+      if (!this.content) {
+        setTimeout(function norenderingSetTimeout() {
+          if (continuation) continuation(null);
+        });
+        return;
+      }
  
        var gfx = new CanvasGraphics(canvasCtx);
        var fonts = [];
@@ -4610,6 +4616,8 @@ var PartialEvaluator = (function partialEvaluator() {
  
          if (replaceGlyph || !glyphs[glyph])
              glyphs[glyph] = map[i];
+        if (replaceGlyph || !glyphs[index])
+            glyphs[index] = map[i];
  
          // If there is no file, the character mapping can't be modified
          // but this is unlikely that there is any standard encoding with
author	notmasteryet <async.processingjs@yahoo.com>
	Mon, 10 Oct 2011 03:40:49 +0000 (22:40 -0500)
committer	notmasteryet <async.processingjs@yahoo.com>
	Mon, 10 Oct 2011 03:40:49 +0000 (22:40 -0500)