Clean up a bit the encoding/charset/glyphs dance

author Vivien Nicolas <21@vingtetun.org>

Mon, 29 Aug 2011 22:56:02 +0000 (00:56 +0200)

committer Vivien Nicolas <21@vingtetun.org>

Mon, 29 Aug 2011 22:56:02 +0000 (00:56 +0200)
author Vivien Nicolas <21@vingtetun.org>
Mon, 29 Aug 2011 22:56:02 +0000 (00:56 +0200)
committer Vivien Nicolas <21@vingtetun.org>
Mon, 29 Aug 2011 22:56:02 +0000 (00:56 +0200)
diff --git a/fonts.js b/fonts.js

index 2d7ff94f506b38177fef5cec37ecdc8f8398f78c..c40b9f1929821368a643988872c66618fb1b56c6 100755 (executable)
--- a/fonts.js
+++ b/fonts.js
@@ -638,30 +638,28 @@ var Font = (function Font() {
      var ulUnicodeRange3 = 0;
      var ulUnicodeRange4 = 0;
  
-    var charset = properties.charset;
-    if (charset && charset.length) {
-      var firstCharIndex = null;
-      var lastCharIndex = 0;
-
-      for (var i = 0; i < charset.length; i++) {
-        var code = GlyphsUnicode[charset[i]];
-        if (firstCharIndex > code || !firstCharIndex)
-          firstCharIndex = code;
-        if (lastCharIndex < code)
-          lastCharIndex = code;
-
-        var position = getUnicodeRangeFor(code);
-        if (position < 32) {
-          ulUnicodeRange1 |= 1 << position;
-        } else if (position < 64) {
-          ulUnicodeRange2 |= 1 << position - 32;
-        } else if (position < 96) {
-          ulUnicodeRange3 |= 1 << position - 64;
-        } else if (position < 123) {
-          ulUnicodeRange4 |= 1 << position - 96;
-        } else {
-          error('Unicode ranges Bits > 123 are reserved for internal usage');
-        }
+    var firstCharIndex = null;
+    var lastCharIndex = 0;
+
+    var encoding = properties.encoding;
+    for (var index in encoding) {
+      var code = encoding[index];
+      if (firstCharIndex > code || !firstCharIndex)
+        firstCharIndex = code;
+      if (lastCharIndex < code)
+        lastCharIndex = code;
+
+      var position = getUnicodeRangeFor(code);
+      if (position < 32) {
+        ulUnicodeRange1 |= 1 << position;
+      } else if (position < 64) {
+        ulUnicodeRange2 |= 1 << position - 32;
+      } else if (position < 96) {
+        ulUnicodeRange3 |= 1 << position - 64;
+      } else if (position < 123) {
+        ulUnicodeRange4 |= 1 << position - 96;
+      } else {
+        error('Unicode ranges Bits > 123 are reserved for internal usage');
        }
      }
  
@@ -847,7 +845,6 @@ var Font = (function Font() {
          }
  
          var encoding = properties.encoding;
-        var charset = properties.charset;
          for (var i = 0; i < numRecords; i++) {
            var table = records[i];
            font.pos = start + table.offset;
@@ -856,7 +853,9 @@ var Font = (function Font() {
            var length = int16(font.getBytes(2));
            var language = int16(font.getBytes(2));
  
-          if (format == 0) {
+          if (format == 4) {
+            return;
+          } else if (format == 0) {
              // Characters below 0x20 are controls characters that are hardcoded
              // into the platform so if some characters in the font are assigned
              // under this limit they will not be displayed so let's rewrite the
@@ -871,35 +870,15 @@ var Font = (function Font() {
                }
              }
  
-            var rewrite = false;
-            for (var code in encoding) {
-              if (code < 0x20 && encoding[code])
-                rewrite = true;
-
-              if (rewrite)
-                encoding[code] = parseInt(code) + 0x1F;
-            }
-
-            if (rewrite) {
+            if (properties.firstChar < 0x20)
+              var code = 0;
                for (var j = 0; j < glyphs.length; j++) {
+                var glyph = glyphs[j];
                  glyphs[j].unicode += 0x1F;
-              }
+                properties.glyphs[glyph.glyph] = encoding[++code] = glyph.unicode;
              }
-            cmap.data = createCMapTable(glyphs, deltas);
-          } else if (format == 6 && numRecords == 1 && !encoding.empty) {
-            // Format 0 alone is not allowed by the sanitizer so let's rewrite
-            // that to a 3-1-4 Unicode BMP table
-            TODO('Use an other source of informations than ' +
-                 'charset here, it is not reliable');
-            var glyphs = [];
-            for (var j = 0; j < charset.length; j++) {
-              glyphs.push({
-                unicode: GlyphsUnicode[charset[j]] || 0
-              });
-            }
-
-            cmap.data = createCMapTable(glyphs);
-          } else if (format == 6 && numRecords == 1) {
+            return cmap.data = createCMapTable(glyphs, deltas);
+          } else if (format == 6) {
              // Format 6 is a 2-bytes dense mapping, which means the font data
              // lives glue together even if they are pretty far in the unicode
              // table. (This looks weird, so I can have missed something), this
@@ -912,6 +891,8 @@ var Font = (function Font() {
              var min = 0xffff, max = 0;
              for (var j = 0; j < entryCount; j++) {
                var charcode = int16(font.getBytes(2));
+              if (!charcode)
+                continue;
                glyphs.push(charcode);
  
                if (charcode < min)
@@ -939,7 +920,7 @@ var Font = (function Font() {
              var index = firstCode;
              for (var j = start; j <= end; j++)
                encoding[index++] = glyphs[j - firstCode - 1].unicode;
-            cmap.data = createCMapTable(glyphs);
+            return cmap.data = createCMapTable(glyphs);
            }
          }
        };
@@ -1288,10 +1269,6 @@ var Font = (function Font() {
              unicode = charcode;
            }
  
-          // Check if the glyph has already been converted
-          if (!IsNum(unicode))
-            unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
-
            // Handle surrogate pairs
            if (unicode > 0xFFFF) {
              str += String.fromCharCode(unicode & 0xFFFF);
@@ -1715,9 +1692,6 @@ var Type1Parser = function() {
              properties.textMatrix = matrix;
              break;
            case '/Encoding':
-            if (!properties.builtInEncoding)
-              break;
-
              var size = parseInt(getToken());
              getToken(); // read in 'array'
  
@@ -1726,9 +1700,12 @@ var Type1Parser = function() {
                if (token == 'dup') {
                  var index = parseInt(getToken());
                  var glyph = getToken();
-                properties.encoding[index] = GlyphsUnicode[glyph];
+              
+                if (!properties.differences[j]) {
+                  var code = GlyphsUnicode[glyph];
+                  properties.glyphs[glyph] = properties.encoding[index] = code;
+                }
                  getToken(); // read the in 'put'
-                j = index;
                }
              }
              break;
@@ -1903,7 +1880,7 @@ CFF.prototype = {
            missings.push(glyph.glyph);
        } else {
          charstrings.push({
-          glyph: glyph,
+          glyph: glyph.glyph,
            unicode: unicode,
            charstring: glyph.data,
            width: glyph.width,
@@ -2079,7 +2056,7 @@ CFF.prototype = {
  
          var count = glyphs.length;
          for (var i = 0; i < count; i++) {
-          var index = CFFStrings.indexOf(charstrings[i].glyph.glyph);
+          var index = CFFStrings.indexOf(charstrings[i].glyph);
            // Some characters like asterikmath && circlecopyrt are
            // missing from the original strings, for the moment let's
            // map them to .notdef and see later if it cause any
@@ -2176,7 +2153,6 @@ var Type2CFF = (function() {
        var stringIndex = this.parseIndex(dictIndex.endPos);
        var gsubrIndex = this.parseIndex(stringIndex.endPos);
  
-
        var strings = this.getStrings(stringIndex);
  
        var baseDict = this.parseDict(dictIndex.get(0));
@@ -2219,7 +2195,7 @@ var Type2CFF = (function() {
        var charstrings = [];
        for (var i = 0, ii = charsets.length; i < ii; ++i) {
          var charName = charsets[i];
-        var charCode = GlyphsUnicode[charName];
+        var charCode = properties.glyphs[charName];
          if (charCode) {
            var width = widths[charCode] || defaultWidth;
            charstrings.push({unicode: charCode, width: width, gid: i});
diff --git a/pdf.js b/pdf.js

index e7095b69220719fcf6ff7b0c330bfee7cb60e051..32e13817f53fc4a7c56b65a46104f0d0991b2efc 100644 (file)
--- a/pdf.js
+++ b/pdf.js
@@ -4199,8 +4199,6 @@ var PartialEvaluator = (function() {
  
        var builtInEncoding = false;
        var encodingMap = {};
-      var glyphMap = {};
-      var charset = [];
        if (compositeFont) {
          // Special CIDFont support
          // XXX only CIDFontType2 supported for now
@@ -4242,69 +4240,61 @@ var PartialEvaluator = (function() {
          if (fontDict.has('Encoding')) {
            var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
            if (IsDict(encoding)) {
-            // Build a map of between codes and glyphs
-            // Load the base encoding
              var baseName = encoding.get('BaseEncoding');
-            if (baseName) {
+            if (baseName)
                baseEncoding = Encodings[baseName.name].slice();
-            }
  
              // Load the differences between the base and original
              var differences = encoding.get('Differences');
              var index = 0;
              for (var j = 0; j < differences.length; j++) {
                var data = differences[j];
-              if (IsNum(data)) {
+              if (IsNum(data))
                  index = data;
-              } else {
+              else
                  diffEncoding[index++] = data.name;
-              }
              }
            } else if (IsName(encoding)) {
              baseEncoding = Encodings[encoding.name].slice();
+          } else {
+            error("Encoding is not a Name nor a Dict");
            }
          }
  
+        var fontType = subType.name; 
          if (!baseEncoding) {
-          var type = subType.name;
-          if (type == 'TrueType') {
-            baseEncoding = Encodings.WinAnsiEncoding.slice();
-          } else if (type == 'Type1') {
-            baseEncoding = Encodings.StandardEncoding.slice();
-            if (!diffEncoding.length)
-              builtInEncoding = true;
-          } else {
-            error('Unknown type of font');
+          switch (fontType) {
+            case 'TrueType':
+              baseEncoding = Encodings.WinAnsiEncoding.slice();
+              break;
+            case 'Type1':
+              baseEncoding = Encodings.StandardEncoding.slice();
+              break;
+            default:
+              warn('Unknown type of font: ' + fontType);
+              break;
            }
          }
  
+        // firstChar and width are required
+        // (except for 14 standard fonts)
+        var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')) || 0;
+        var lastChar = xref.fetchIfRef(fontDict.get('LastChar')) || 0;
+        var widths = xref.fetchIfRef(fontDict.get('Widths')) || [];
+
          // merge in the differences
-        var length = baseEncoding.length > diffEncoding.length ?
-                     baseEncoding.length : diffEncoding.length;
-        for (var i = 0, ii = length; i < ii; ++i) {
-          var diffGlyph = diffEncoding[i];
-          var baseGlyph = baseEncoding[i];
-          if (diffGlyph) {
-            glyphMap[i] = diffGlyph;
-            encodingMap[i] = GlyphsUnicode[diffGlyph];
-          } else if (baseGlyph) {
-            glyphMap[i] = baseGlyph;
-            encodingMap[i] = GlyphsUnicode[baseGlyph];
-          }
+        var glyphsMap = {};
+        for (var i = firstChar; i <= lastChar; i++) {
+          var glyph = diffEncoding[i] || baseEncoding[i];
+          if (glyph)
+            glyphsMap[glyph] = encodingMap[i] = GlyphsUnicode[glyph];
          }
  
-        if (fontDict.has('ToUnicode')) {
-          encodingMap['empty'] = true;
-          var glyphsMap = {};
-          for (var p in glyphMap)
-            glyphsMap[glyphMap[p]] = encodingMap[p];
-
+        if (fontDict.has('ToUnicode') && differences) {
            var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode'));
            if (IsName(cmapObj)) {
              error('ToUnicode file cmap translation not implemented');
            } else if (IsStream(cmapObj)) {
-            var firstChar = xref.fetchIfRef(fontDict.get('FirstChar'));
-
              var tokens = [];
              var token = '';
  
@@ -4334,6 +4324,8 @@ var PartialEvaluator = (function() {
                        var startRange = parseInt('0x' + tokens[j]);
                        var endRange = parseInt('0x' + tokens[j + 1]);
                        var code = parseInt('0x' + tokens[j + 2]);
+                      for (var k = startRange; k < endRange; k++)
+                        encodingMap[k] = code++;
                      }
                      break;
  
@@ -4360,15 +4352,6 @@ var PartialEvaluator = (function() {
              }
            }
          }
-
-        // firstChar and width are required
-        // (except for 14 standard fonts)
-        var firstChar = xref.fetchIfRef(fontDict.get('FirstChar'));
-        var widths = xref.fetchIfRef(fontDict.get('Widths')) || [];
-        for (var j = 0; j < widths.length; j++) {
-          if (widths[j])
-            charset.push(glyphMap[j + firstChar]);
-        }
        }
  
        if (!fd) {
@@ -4396,7 +4379,6 @@ var PartialEvaluator = (function() {
        }
  
        var descriptor = xref.fetch(fd);
-
        var fontName = fontDict.get('Name');
        if (!fontName)
          fontName = xref.fetchIfRef(descriptor.get('FontName'));;
@@ -4414,14 +4396,6 @@ var PartialEvaluator = (function() {
          }
        }
  
-      if (descriptor.has('CharSet')) {
-        // Get the font charset if any (meaningful only in Type 1)
-        charset = descriptor.get('CharSet');
-        assertWellFormed(IsString(charset), 'invalid charset');
-        charset = charset.split('/');
-        charset.shift();
-      }
-
        var widths = fontDict.get('Widths');
        if (widths) {
          var glyphWidths = {};
@@ -4435,9 +4409,8 @@ var PartialEvaluator = (function() {
          subtype: fileType,
          widths: glyphWidths,
          encoding: encodingMap,
+        differences: diffEncoding,
          glyphs: glyphsMap || GlyphsUnicode,
-        builtInEncoding: builtInEncoding,
-        charset: charset,
          firstChar: fontDict.get('FirstChar'),
          lastChar: fontDict.get('LastChar'),
          bbox: descriptor.get('FontBBox'),
author	Vivien Nicolas <21@vingtetun.org>
	Mon, 29 Aug 2011 22:56:02 +0000 (00:56 +0200)
committer	Vivien Nicolas <21@vingtetun.org>
	Mon, 29 Aug 2011 22:56:02 +0000 (00:56 +0200)