Removing adaptUnicode; making cmap equal to ToUnicode tables

author notmasteryet <async.processingjs@yahoo.com>

Mon, 28 Nov 2011 02:43:23 +0000 (20:43 -0600)

committer notmasteryet <async.processingjs@yahoo.com>

Mon, 28 Nov 2011 02:43:23 +0000 (20:43 -0600)
author notmasteryet <async.processingjs@yahoo.com>
Mon, 28 Nov 2011 02:43:23 +0000 (20:43 -0600)
committer notmasteryet <async.processingjs@yahoo.com>
Mon, 28 Nov 2011 02:43:23 +0000 (20:43 -0600)
diff --git a/src/evaluator.js b/src/evaluator.js

index 03fce2d9aa6544ba3c21e4e3e91cd189700cf3e1..3e687c72d181a90aa6f6fd9a2295f87d78dcff6d 100644 (file)
--- a/src/evaluator.js
+++ b/src/evaluator.js
@@ -555,9 +555,21 @@ var PartialEvaluator = (function partialEvaluator() {
                    var startRange = tokens[j];
                    var endRange = tokens[j + 1];
                    var code = tokens[j + 2];
-                  while (startRange <= endRange) {
-                    charToUnicode[startRange] = code++;
-                    ++startRange;
+                  if (code == 0xFFFF) {
+                    // CMap is broken, assuming code == startRange
+                    code = startRange;
+                  }
+                  if (isArray(code)) {
+                    var codeindex = 0;
+                    while (startRange <= endRange) {
+                      charToUnicode[startRange] = code[codeindex++];
+                      ++startRange;
+                    }
+                  } else {
+                    while (startRange <= endRange) {
+                      charToUnicode[startRange] = code++;
+                      ++startRange;
+                    }
                    }
                  }
                  break;
diff --git a/src/fonts.js b/src/fonts.js

index d028a9786ab0b91ca386438ae292fd1a1d35f5c1..fb9bb9f0cdd07c54b87837dd78eb05278d73d811 100644 (file)
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -719,20 +719,10 @@ function getUnicodeRangeFor(value) {
    return -1;
  }
  
-function adaptUnicode(unicode) {
-  return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ?
-    unicode + kCmapGlyphOffset : unicode;
-}
-
-function isAdaptedUnicode(unicode) {
-  return unicode >= kCmapGlyphOffset &&
-    unicode < kCmapGlyphOffset + kSizeOfGlyphArea;
-}
-
  function isSpecialUnicode(unicode) {
    return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ||
-    unicode >= kCmapGlyphOffset &&
-    unicode < kCmapGlyphOffset + kSizeOfGlyphArea;
+    (unicode >= kCmapGlyphOffset &&
+    unicode < kCmapGlyphOffset + kSizeOfGlyphArea);
  }
  
  /**
@@ -965,15 +955,15 @@ var Font = (function Font() {
      var ranges = [];
      for (var n = 0; n < length; ) {
        var start = codes[n].unicode;
-      var startCode = codes[n].code;
+      var codeIndices = [codes[n].code];
        ++n;
        var end = start;
        while (n < length && end + 1 == codes[n].unicode) {
+        codeIndices.push(codes[n].code);
          ++end;
          ++n;
        }
-      var endCode = codes[n - 1].code;
-      ranges.push([start, end, startCode, endCode]);
+      ranges.push([start, end, codeIndices]);
      }
  
      return ranges;
@@ -1016,17 +1006,16 @@ var Font = (function Font() {
          idDeltas += string16(0);
          idRangeOffsets += string16(offset);
  
-        var startCode = range[2];
-        var endCode = range[3];
-        for (var j = startCode; j <= endCode; ++j)
-          glyphsIds += string16(deltas[j]);
+        var codes = range[2];
+        for (var j = 0, jj = codes.length; j < jj; ++j)
+          glyphsIds += string16(deltas[codes[j]]);
        }
      } else {
        for (var i = 0; i < segCount - 1; i++) {
          var range = ranges[i];
          var start = range[0];
          var end = range[1];
-        var startCode = range[2];
+        var startCode = range[2][0];
  
          startCount += string16(start);
          endCount += string16(end);
@@ -1303,7 +1292,7 @@ var Font = (function Font() {
            properties.baseEncoding = encoding;
        }
  
-      function replaceCMapTable(cmap, font, properties) {
+      function readCMapTable(cmap, font) {
          var start = (font.start ? font.start : 0) + cmap.offset;
          font.pos = start;
  
@@ -1320,7 +1309,7 @@ var Font = (function Font() {
          }
  
          // Check that table are sorted by platformID then encodingID,
-        records.sort(function fontReplaceCMapTableSort(a, b) {
+        records.sort(function fontReadCMapTableSort(a, b) {
            return ((a.platformID << 16) + a.encodingID) -
                   ((b.platformID << 16) + b.encodingID);
          });
@@ -1375,16 +1364,15 @@ var Font = (function Font() {
              for (var j = 0; j < 256; j++) {
                var index = font.getByte();
                if (index) {
-                var unicode = adaptUnicode(j);
-                glyphs.push({ unicode: unicode, code: j });
+                glyphs.push({ unicode: j, code: j });
                  ids.push(index);
                }
              }
-
-            properties.hasShortCmap = true;
-
-            createGlyphNameMap(glyphs, ids, properties);
-            return cmap.data = createCMapTable(glyphs, ids);
+            return {
+              glyphs: glyphs,
+              ids: ids,
+              hasShortCmap: true
+            };
            } else if (format == 4) {
              // re-creating the table in format 4 since the encoding
              // might be changed
@@ -1436,17 +1424,18 @@ var Font = (function Font() {
                  var glyphCode = offsetIndex < 0 ? j :
                    offsets[offsetIndex + j - start];
                  glyphCode = (glyphCode + delta) & 0xFFFF;
-                if (glyphCode == 0 || isAdaptedUnicode(j))
+                if (glyphCode == 0)
                    continue;
  
-                var unicode = adaptUnicode(j);
-                glyphs.push({ unicode: unicode, code: j });
+                glyphs.push({ unicode: j, code: j });
                  ids.push(glyphCode);
                }
              }
  
-            createGlyphNameMap(glyphs, ids, properties);
-            return cmap.data = createCMapTable(glyphs, ids);
+            return {
+              glyphs: glyphs,
+              ids: ids
+            };
            } else if (format == 6) {
              // Format 6 is a 2-bytes dense mapping, which means the font data
              // lives glue together even if they are pretty far in the unicode
@@ -1461,19 +1450,18 @@ var Font = (function Font() {
              for (var j = 0; j < entryCount; j++) {
                var glyphCode = int16(font.getBytes(2));
                var code = firstCode + j;
-              if (isAdaptedUnicode(glyphCode))
-                continue;
  
-              var unicode = adaptUnicode(code);
-              glyphs.push({ unicode: unicode, code: code });
+              glyphs.push({ unicode: code, code: code });
                ids.push(glyphCode);
              }
  
-            createGlyphNameMap(glyphs, ids, properties);
-            return cmap.data = createCMapTable(glyphs, ids);
+            return {
+              glyphs: glyphs,
+              ids: ids
+            };
            }
          }
-        return cmap.data;
+        error('Unsupported cmap table format');
        };
  
        function sanitizeMetrics(font, header, metrics, numGlyphs) {
@@ -1712,17 +1700,60 @@ var Font = (function Font() {
            tables.push(cmap);
          }
  
-        var glyphs = [];
+        var glyphs = [], ids = [];
+        var usedUnicodes = [], unusedUnicode = kCmapGlyphOffset;
+        var cidToGidMap = properties.cidToGidMap;
          for (i = 1; i < numGlyphs; i++) {
-          if (isAdaptedUnicode(i))
-            continue;
-
-          glyphs.push({ unicode: adaptUnicode(i) });
+          var cid = cidToGidMap ? cidToGidMap.indexOf(i) : i;
+          var unicode = this.toUnicode[cid];
+          if (!unicode || isSpecialUnicode(unicode) ||
+              unicode in usedUnicodes) {
+            // overriding the special special symbols mapping
+            while (unusedUnicode in usedUnicodes)
+              unusedUnicode++;
+            this.toUnicode[cid] = unicode = unusedUnicode++;
+            if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea) {
+              // overflow of the user defined symblos range
+              // using symbols that a little bit lower than this range
+              unusedUnicode = kCmapGlyphOffset - numGlyphs;
+            }
+          }
+          usedUnicodes[unicode] = true;
+          glyphs.push({ unicode: unicode, code: cid });
+          ids.push(i);
          }
-        cmap.data = createCMapTable(glyphs);
+        cmap.data = createCMapTable(glyphs, ids);
        } else {
-        replaceCMapTable(cmap, font, properties);
+        var cmapTable = readCMapTable(cmap, font);
+        var glyphs = cmapTable.glyphs;
+        var ids = cmapTable.ids;
+        var hasShortCmap = !!cmapTable.hasShortCmap;
+        var toUnicode = this.toUnicode;
+
+        if (hasShortCmap && toUnicode) {
+          // checking if cmap is just identity map
+          var isIdentity = true;
+          for (var i = 0, ii = glyphs.length; i < ii; i++) {
+            if (glyphs[i].unicode != i + 1) {
+              isIdentity = false;
+              break;
+            }
+          }
+          // if it is, replacing with meaningful toUnicode values
+          if (isIdentity) {
+            for (var i = 0, ii = glyphs.length; i < ii; i++) {
+              var unicode = toUnicode[i + 1] || i + 1;
+              glyphs[i].unicode = unicode;
+            }
+            this.useToUnicode = true;
+          }
+        }
+        properties.hasShortCmap = hasShortCmap;
+
+        createGlyphNameMap(glyphs, ids, properties);
          this.glyphNameMap = properties.glyphNameMap;
+
+        cmap.data = createCMapTable(glyphs, ids);
        }
  
        // Rewrite the 'post' table if needed
@@ -1812,6 +1843,14 @@ var Font = (function Font() {
          }
          properties.baseEncoding = encoding;
        }
+      if (properties.subtype == 'CIDFontType0C') {
+        var toUnicode = [];
+        for (var i = 0; i < charstrings.length; ++i) {
+          var charstring = charstrings[i];
+          toUnicode[charstring.code] = charstring.unicode;
+        }
+        this.toUnicode = toUnicode;
+      }
  
        var fields = {
          // PostScript Font Program
@@ -1872,8 +1911,11 @@ var Font = (function Font() {
          // Horizontal metrics
          'hmtx': (function fontFieldsHmtx() {
            var hmtx = '\x00\x00\x00\x00'; // Fake .notdef
-          for (var i = 0, ii = charstrings.length; i < ii; i++)
-            hmtx += string16(charstrings[i].width) + string16(0);
+          for (var i = 0, ii = charstrings.length; i < ii; i++) {
+            var charstring = charstrings[i];
+            var width = 'width' in charstring ? charstring.width : 0;
+            hmtx += string16(width) + string16(0);
+          }
            return stringToArray(hmtx);
          })(),
  
@@ -1903,20 +1945,22 @@ var Font = (function Font() {
      },
  
      rebuildToUnicode: function font_rebuildToUnicode(properties) {
+      var firstChar = properties.firstChar, lastChar = properties.lastChar;
        var map = [];
        if (properties.composite) {
-        for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) {
+        var isIdentityMap = this.cidToUnicode.length == 0;
+        for (var i = firstChar, ii = lastChar; i <= ii; i++) {
            // TODO missing map the character according font's CMap
            var cid = i;
-          map[i] = this.cidToUnicode[cid];
+          map[i] = isIdentityMap ? cid : this.cidToUnicode[cid];
          }
        } else {
-        for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) {
+        for (var i = firstChar, ii = lastChar; i <= ii; i++) {
            var glyph = properties.differences[i];
            if (!glyph)
              glyph = properties.baseEncoding[i];
            if (!!glyph && (glyph in GlyphsUnicode))
-            map[i] = GlyphsUnicode[glyph]
+            map[i] = GlyphsUnicode[glyph];
          }
        }
        this.toUnicode = map;
@@ -1926,16 +1970,12 @@ var Font = (function Font() {
      },
  
      loadCidToUnicode: function font_loadCidToUnicode(properties) {
-      if (properties.cidToGidMap) {
-        this.cidToUnicode = properties.cidToGidMap;
-        return;
-      }
-
        if (!properties.cidSystemInfo)
          return;
  
-      var cidToUnicodeMap = [];
+      var cidToUnicodeMap = [], unicodeToCIDMap = [];
        this.cidToUnicode = cidToUnicodeMap;
+      this.unicodeToCID = unicodeToCIDMap;
  
        var cidSystemInfo = properties.cidSystemInfo;
        var cidToUnicode;
@@ -1947,28 +1987,34 @@ var Font = (function Font() {
        if (!cidToUnicode)
          return; // identity encoding
  
-      var glyph = 1, i, j, k, ii;
+      var cid = 1, i, j, k, ii;
        for (i = 0, ii = cidToUnicode.length; i < ii; ++i) {
          var unicode = cidToUnicode[i];
          if (isArray(unicode)) {
            var length = unicode.length;
-          for (j = 0; j < length; j++)
-            cidToUnicodeMap[unicode[j]] = glyph;
-          glyph++;
+          for (j = 0; j < length; j++) {
+            cidToUnicodeMap[cid] = unicode[j];
+            unicodeToCIDMap[unicode[j]] = cid;
+          }
+          cid++;
          } else if (typeof unicode === 'object') {
            var fillLength = unicode.f;
            if (fillLength) {
              k = unicode.c;
              for (j = 0; j < fillLength; ++j) {
-              cidToUnicodeMap[k] = glyph++;
+              cidToUnicodeMap[cid] = k;
+              unicodeToCIDMap[k] = cid;
+              cid++;
                k++;
              }
            } else
-            glyph += unicode.s;
+            cid += unicode.s;
          } else if (unicode) {
-          cidToUnicodeMap[unicode] = glyph++;
+          cidToUnicodeMap[cid] = unicode;
+          unicodeToCIDMap[unicode] = cid;
+          cid++;
          } else
-          glyph++;
+          cid++;
        }
      },
  
@@ -2008,19 +2054,19 @@ var Font = (function Font() {
        switch (this.type) {
          case 'CIDFontType0':
            if (this.noUnicodeAdaptation) {
-            width = this.widths[this.cidToUnicode[charcode]];
+            width = this.widths[this.unicodeToCID[charcode] || charcode];
              unicode = charcode;
              break;
            }
-          unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode);
+          unicode = this.toUnicode[charcode] || charcode;
            break;
          case 'CIDFontType2':
            if (this.noUnicodeAdaptation) {
-            width = this.widths[this.cidToUnicode[charcode]];
+            width = this.widths[this.unicodeToCID[charcode] || charcode];
              unicode = charcode;
              break;
            }
-          unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode);
+          unicode = this.toUnicode[charcode] || charcode;
            break;
          case 'Type1':
            var glyphName = this.differences[charcode] || this.encoding[charcode];
@@ -2031,7 +2077,7 @@ var Font = (function Font() {
              break;
            }
            unicode = this.glyphNameMap[glyphName] ||
-            adaptUnicode(GlyphsUnicode[glyphName] || charcode);
+            GlyphsUnicode[glyphName] || charcode;
            break;
          case 'Type3':
            var glyphName = this.differences[charcode] || this.encoding[charcode];
@@ -2049,16 +2095,16 @@ var Font = (function Font() {
              break;
            }
            if (!this.hasEncoding) {
-            unicode = adaptUnicode(charcode);
+            unicode = this.useToUnicode ? this.toUnicode[charcode] : charcode;
              break;
            }
-          if (this.hasShortCmap) {
+          if (this.hasShortCmap && false) {
              var j = Encodings.MacRomanEncoding.indexOf(glyphName);
-            unicode = j >= 0 && !isSpecialUnicode(j) ? j :
+            unicode = j >= 0 ? j :
                this.glyphNameMap[glyphName];
            } else {
              unicode = glyphName in GlyphsUnicode ?
-              adaptUnicode(GlyphsUnicode[glyphName]) :
+              GlyphsUnicode[glyphName] :
                this.glyphNameMap[glyphName];
            }
            break;
@@ -2068,12 +2114,8 @@ var Font = (function Font() {
        }
  
        var unicodeChars = this.toUnicode ? this.toUnicode[charcode] : charcode;
-      if (typeof unicodeChars === 'number') {
-        unicodeChars = (unicodeChars >= 0x10000) ?
-            String.fromCharCode(0xD800 | ((unicodeChars - 0x10000) >> 10),
-            0xDC00 | (unicodeChars & 0x3FF)) : String.fromCharCode(unicodeChars);
-        // TODO we probably don't need convert high/low surrogate... keeping for now
-      }
+      if (typeof unicodeChars === 'number')
+        unicodeChars = String.fromCharCode(unicodeChars);
  
        return {
          fontChar: String.fromCharCode(unicode),
@@ -2790,22 +2832,13 @@ CFF.prototype = {
    getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs,
                                                              properties) {
      var charstrings = [];
-    var reverseMapping = {};
-    var encoding = properties.baseEncoding;
      var i, length, glyphName;
-    for (i = 0, length = encoding.length; i < length; ++i) {
-      glyphName = encoding[i];
-      if (!glyphName || isSpecialUnicode(i))
-        continue;
-      reverseMapping[glyphName] = i;
-    }
-    reverseMapping['.notdef'] = 0;
      var unusedUnicode = kCmapGlyphOffset;
      for (i = 0, length = glyphs.length; i < length; i++) {
        var item = glyphs[i];
        var glyphName = item.glyph;
-      var unicode = glyphName in reverseMapping ?
-        reverseMapping[glyphName] : unusedUnicode++;
+      var unicode = glyphName in GlyphsUnicode ?
+        GlyphsUnicode[glyphName] : unusedUnicode++;
        charstrings.push({
          glyph: glyphName,
          unicode: unicode,
@@ -3092,16 +3125,14 @@ var Type2CFF = (function type2CFF() {
        }
  
        var charStrings = this.parseIndex(topDict.CharStrings);
-      var charset = this.parseCharsets(topDict.charset,
-                                       charStrings.length, strings);
-      var encoding = this.parseEncoding(topDict.Encoding, properties,
-                                             strings, charset);
  
        var charset, encoding;
        var isCIDFont = properties.subtype == 'CIDFontType0C';
        if (isCIDFont) {
-        charset = [];
-        charset.length = charStrings.length;
+        charset = ['.notdef'];
+        for (var i = 1, ii = charStrings.length; i < ii; ++i)
+          charset.push('glyph' + i);
+
          encoding = this.parseCidMap(topDict.charset,
                                      charStrings.length);
        } else {
@@ -3170,38 +3201,44 @@ var Type2CFF = (function type2CFF() {
        var charstrings = [];
        var unicodeUsed = [];
        var unassignedUnicodeItems = [];
+      var inverseEncoding = [];
+      for (var charcode in encoding)
+        inverseEncoding[encoding[charcode]] = charcode | 0;
        for (var i = 0, ii = charsets.length; i < ii; i++) {
          var glyph = charsets[i];
-        var encodingFound = false;
-        for (var charcode in encoding) {
-          if (encoding[charcode] == i) {
-            var code = charcode | 0;
-            charstrings.push({
-              unicode: adaptUnicode(code),
-              code: code,
-              gid: i,
-              glyph: glyph
-            });
-            unicodeUsed[code] = true;
-            encodingFound = true;
-            break;
-          }
+        if (glyph == '.notdef') {
+          charstrings.push({
+            unicode: 0,
+            code: 0,
+            gid: i,
+            glyph: glyph
+          });
+          continue;
          }
-        if (!encodingFound) {
+        var code = inverseEncoding[i];
+        if (!code || isSpecialUnicode(code)) {
            unassignedUnicodeItems.push(i);
+          continue;
          }
+        charstrings.push({
+          unicode: code,
+          code: code,
+          gid: i,
+          glyph: glyph
+        });
+        unicodeUsed[code] = true;
        }
  
-      var nextUnusedUnicode = 0x21;
+      var nextUnusedUnicode = kCmapGlyphOffset;
        for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; ++j) {
          var i = unassignedUnicodeItems[j];
          // giving unicode value anyway
-        while (unicodeUsed[nextUnusedUnicode])
+        while (nextUnusedUnicode in unicodeUsed)
            nextUnusedUnicode++;
-        var code = nextUnusedUnicode++;
+        var unicode = nextUnusedUnicode++;
          charstrings.push({
-          unicode: adaptUnicode(code),
-          code: code,
+          unicode: unicode,
+          code: inverseEncoding[i] || 0,
            gid: i,
            glyph: charsets[i]
          });
author	notmasteryet <async.processingjs@yahoo.com>
	Mon, 28 Nov 2011 02:43:23 +0000 (20:43 -0600)
committer	notmasteryet <async.processingjs@yahoo.com>
	Mon, 28 Nov 2011 02:43:23 +0000 (20:43 -0600)
src/evaluator.js		patch \| blob \| history
src/fonts.js		patch \| blob \| history