]> git.parisson.com Git - pdf.js.git/commitdiff
Add more glue between glyph mapping and code mapping
authorVivien Nicolas <21@vingtetun.org>
Thu, 8 Sep 2011 01:16:33 +0000 (03:16 +0200)
committerVivien Nicolas <21@vingtetun.org>
Thu, 8 Sep 2011 01:16:33 +0000 (03:16 +0200)
fonts.js
pdf.js

index 659c5157efa242ed31014c1ca26bd942aec77000..bfdbb0b4ad41c05623defe397c0d1b1cb43d575d 100755 (executable)
--- a/fonts.js
+++ b/fonts.js
@@ -711,7 +711,7 @@ var Font = (function Font() {
 
     var encoding = properties.encoding;
     for (var index in encoding) {
-      var code = encoding[index];
+      var code = encoding[index].unicode;
       if (firstCharIndex > code || !firstCharIndex)
         firstCharIndex = code;
       if (lastCharIndex < code)
@@ -970,15 +970,9 @@ var Font = (function Font() {
               if (index) {
                 deltas.push(index);
 
-                var code = encoding[index];
-                for (var glyph in properties.glyphs) {
-                  if (properties.glyphs[glyph] == code) 
-                    break;
-                }
-
                 var unicode = j + kCmapGlyphOffset;
-                properties.glyphs[glyph] = encoding[j] = unicode;
-                glyphs.push({ glyph: glyph, unicode: unicode });
+                encoding[j].unicode = unicode;
+                glyphs.push({ unicode: unicode });
               }
             }
             
@@ -1023,8 +1017,10 @@ var Font = (function Font() {
             var start = denseRange[0];
             var end = denseRange[1];
             var index = firstCode;
-            for (var j = start; j <= end; j++)
-              encoding[index++] = glyphs[j - firstCode - 1].unicode;
+            for (var j = start; j <= end; j++) {
+              var code = j - firstCode - 1;
+              encoding[index++] = { unicode: glyphs[code].unicode };
+            }
             return cmap.data = createCMapTable(glyphs);
           }
         }
@@ -1118,30 +1114,28 @@ var Font = (function Font() {
         // U+00AD (soft hyphen) is not drawn.
         // So, offset all the glyphs by 0xFF to avoid these cases and use
         // the encoding to map incoming characters to the new glyph positions
+        if (!cmap) {
+          cmap = {
+            tag: 'cmap',
+            data: null
+          };
+          tables.push(cmap);
+        }
 
-        var glyphs = [];
         var encoding = properties.encoding;
-
-        for (var i = 1; i < numGlyphs; i++)
-          glyphs.push({ unicode: i + kCmapGlyphOffset });
-
-        if ('undefined' == typeof(encoding[0])) {
+        if (!encoding[0]) {
           // the font is directly characters to glyphs with no encoding
           // so create an identity encoding
           for (i = 0; i < numGlyphs; i++)
-            encoding[i] = i + kCmapGlyphOffset;
+            encoding[i] = { unicode: i + kCmapGlyphOffset };
         } else {
           for (var code in encoding)
-            encoding[code] += kCmapGlyphOffset;
+            encoding[code].unicode += kCmapGlyphOffset;
         }
 
-        if (!cmap) {
-          cmap = {
-            tag: 'cmap',
-            data: null
-          };
-          tables.push(cmap);
-        }
+        var glyphs = [];
+        for (var i = 1; i < numGlyphs; i++)
+          glyphs.push({ unicode: i + kCmapGlyphOffset });
         cmap.data = createCMapTable(glyphs);
       } else {
         replaceCMapTable(cmap, font, properties);
@@ -1361,14 +1355,14 @@ var Font = (function Font() {
                                        // loop should never end on the last byte
         for (var i = 0; i < length; i++) {
           var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]);
-          var unicode = encoding[charcode];
+          var unicode = encoding[charcode].unicode;
           str += String.fromCharCode(unicode);
         }
       }
       else {
         for (var i = 0; i < chars.length; ++i) {
           var charcode = chars.charCodeAt(i);
-          var unicode = encoding[charcode];
+          var unicode = encoding[charcode].unicode;
           if ('undefined' == typeof(unicode)) {
             warn('Unencoded charcode ' + charcode);
             unicode = charcode;
@@ -1376,7 +1370,7 @@ var Font = (function Font() {
 
           // Check if the glyph has already been converted
           if (!IsNum(unicode))
-            unicode = encoding[charcode] = this.glyphs[unicode];
+            unicode = encoding[charcode].unicode = this.glyphs[unicode].unicode;
 
           // Handle surrogate pairs
           if (unicode > 0xFFFF) {
@@ -1830,8 +1824,8 @@ var Type1Parser = function() {
                 var glyph = getToken();
               
                 if ('undefined' == typeof(properties.differences[index])) {
-                  properties.encoding[index] = glyph;
-                  properties.glyphs[glyph] = GlyphsUnicode[glyph] || index;
+                  var mapping = { unicode: GlyphsUnicode[glyph] || j };
+                  properties.glyphs[glyph] = properties.encoding[index] = mapping;
                 }
                 getToken(); // read the in 'put'
               }
@@ -2000,14 +1994,14 @@ CFF.prototype = {
 
     for (var i = 0; i < glyphs.length; i++) {
       var glyph = glyphs[i];
-      var unicode = properties.glyphs[glyph.glyph];
-      if (!unicode) {
+      var mapping = properties.glyphs[glyph.glyph];
+      if (!mapping) {
         if (glyph.glyph != '.notdef')
           missings.push(glyph.glyph);
       } else {
         charstrings.push({
           glyph: glyph.glyph,
-          unicode: unicode,
+          unicode: mapping.unicode,
           charstring: glyph.data,
           width: glyph.width,
           lsb: glyph.lsb
@@ -2340,17 +2334,24 @@ var Type2CFF = (function() {
           }
         }
 
-        if (code == -1)
-          index = code = properties.glyphs[glyph] || index;
+        if (code == -1) {
+          var mapping = properties.glyphs[glyph] || {};
+          index = code = mapping.unicode || index;
+        }
 
         var width = widths[code] || defaultWidth;
         if (code <= 0x1f || (code >= 127 && code <= 255))
           code += kCmapGlyphOffset;
 
-        properties.encoding[index] = code;
+        properties.glyphs[glyph] = properties.encoding[index] = {
+          unicode: code,
+          width: width
+        };
+
         charstrings.push({
           unicode: code,
-          width: width, gid: i
+          width: width,
+          gid: i
         });
         index++;
       }
diff --git a/pdf.js b/pdf.js
index 5b0558940a5c3ade25e4c81b58e5c71c41935779..73575a4f306844af4fe70a4e56ac0b3196202f4b 100644 (file)
--- a/pdf.js
+++ b/pdf.js
@@ -4194,13 +4194,19 @@ var PartialEvaluator = (function() {
           var glyphsData = glyphsStream.getBytes(0);
 
           // Glyph ids are big-endian 2-byte values
-          // Set this to 0 to verify the font has an encoding.
           var encoding = properties.encoding;
-          encoding[0] = 0;
+
+          // Set encoding 0 to later verify the font has an encoding
+          encoding[0] = { unicode: 0 };
           for (var j = 0; j < glyphsData.length; j++) {
             var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
-            if (glyphID != 0)
-              encoding[j >> 1] = glyphID;
+            if (glyphID == 0)
+              continue;
+
+            encoding[j >> 1] = {
+              unicode: glyphID,
+              width: 0
+            };
           }
         } else if (type == 'CIDFontType0') {
           var encoding = xref.fetchIfRef(dict.get('Encoding'));
@@ -4269,7 +4275,10 @@ var PartialEvaluator = (function() {
         var glyph = differences[i] || baseEncoding[i];
         if (glyph) {
           var index = GlyphsUnicode[glyph] || i;
-          glyphs[glyph] = map[i] = index;
+          glyphs[glyph] = map[i] = {
+            unicode: index,
+            width: properties.widths[i - firstChar] || properties.defaultWidth
+          };
 
           // If there is no file, the character mapping can't be modified
           // but this is unlikely that there is any standard encoding with
@@ -4278,7 +4287,7 @@ var PartialEvaluator = (function() {
             continue;
 
           if (index <= 0x1f || (index >= 127 && index <= 255))
-            glyphs[glyph] = map[i] += kCmapGlyphOffset;
+            map[i].unicode += kCmapGlyphOffset;
         }
       }
 
@@ -4316,7 +4325,10 @@ var PartialEvaluator = (function() {
                     var endRange = tokens[j + 1];
                     var code = tokens[j + 2];
                     while (startRange < endRange) {
-                      map[startRange] = code++;
+                      map[startRange] = {
+                        unicode: code++,
+                        width: 0
+                      }
                       ++startRange;
                     }
                   }
@@ -4327,7 +4339,10 @@ var PartialEvaluator = (function() {
                   for (var j = 0; j < tokens.length; j += 2) {
                     var index = tokens[j];
                     var code = tokens[j + 1];
-                    map[index] = code;
+                    map[index] = {
+                      unicode: code,
+                      width: 0
+                    };
                   }
                   break;
 
@@ -4478,19 +4493,18 @@ var PartialEvaluator = (function() {
         descent: descriptor.get('Descent'),
         xHeight: descriptor.get('XHeight'),
         capHeight: descriptor.get('CapHeight'),
+        defaultWidth: descriptor.get('MissingWidth') || 0,
         flags: descriptor.get('Flags'),
         italicAngle: descriptor.get('ItalicAngle'),
         differences: [],
-        widths: [],
+        widths: (function() {
+          var glyphWidths = {};
+          for (var i = 0; i <= widths.length; i++)
+            glyphWidths[firstChar++] = widths[i];
+          return glyphWidths;
+        })(),
         encoding: {}
       };
-
-      // XXX Encoding and Glyphs should point to the same object so it will
-      // be hard to be out of sync. The object could contains the unicode and
-      // the width of the glyph.
-      for (var i = 0; i <= widths.length; i++)
-        properties.widths[firstChar++] = widths[i];
-
       properties.glyphs = this.extractEncoding(dict, xref, properties);
 
       return {