Second pass CIDFont support - make Arial Unicode in OpenOffice PDF

author Adil Allawi <adil@diwan.com>

Mon, 11 Jul 2011 16:41:47 +0000 (17:41 +0100)

committer Adil Allawi <adil@diwan.com>

Mon, 11 Jul 2011 16:41:55 +0000 (17:41 +0100)
author Adil Allawi <adil@diwan.com>
Mon, 11 Jul 2011 16:41:47 +0000 (17:41 +0100)
committer Adil Allawi <adil@diwan.com>
Mon, 11 Jul 2011 16:41:55 +0000 (17:41 +0100)
diff --git a/fonts.js b/fonts.js

index 1f0a6a21122356fc5e08b073be659293e63a0545..2728afd894b62ffdaf9ce343cf0d7b6d58ac8b84 100755 (executable)
--- a/fonts.js
+++ b/fonts.js
@@ -404,12 +404,21 @@ var Font = (function() {
          data = this.checkAndRepair(name, file, properties);
          break;
  
+      case 'Type0':
+        //this is a Truetype font
+        this.mimetype = 'font/opentype';
+
+        // Repair the TrueType file if it is can be damaged in the point of
+        // view of the sanitizer
+        data = this.checkAndRepair(name, file, properties);
+        break;
+
        default:
          warn('Font ' + properties.type + ' is not supported');
          break;
      }
      this.data = data;
-
+    this.type = properties.type; //use the type to test if the string is single or multi-byte
      this.id = Fonts.registerFont(name, data, properties);
      this.loadedName = 'pdfFont' + this.id;
    };
@@ -856,8 +865,26 @@ var Font = (function() {
            data: stringToArray(createOS2Table(properties))
          });
  
-        // Replace the old CMAP table with a shiny new one
-        replaceCMapTable(cmap, font, properties);
+        if (!cmap) {
+          var glyphs = [];
+          var charset = properties.charset;
+          for (var i=1; i < charset.length; i++) {
+            if (charset.indexOf(i) != -1) {
+              glyphs.push({
+                unicode: charset.indexOf(i)
+              });
+            } else {
+              break;
+            }
+          }
+          tables.push({
+            tag: 'cmap',
+            data: createCMapTable(glyphs)
+          })
+        } else {
+          // Replace the old CMAP table with a shiny new one
+          replaceCMapTable(cmap, font, properties);          
+        }
  
          // Rewrite the 'post' table if needed
          if (!post) {
@@ -1110,44 +1137,63 @@ var Font = (function() {
  
      charsToUnicode: function fonts_chars2Unicode(chars) {
        var charsCache = this.charsCache;
-
+      var str;
+      
        // if we translated this string before, just grab it from the cache
        if (charsCache) {
-        var str = charsCache[chars];
+        str = charsCache[chars];
          if (str)
            return str;
        }
-
-      // translate the string using the font's encoding
-      var encoding = this.encoding;
-      if (!encoding)
-        return chars;
-
+      
        // lazily create the translation cache
        if (!charsCache)
          charsCache = this.charsCache = Object.create(null);
-
-      str = '';
-      for (var i = 0; i < chars.length; ++i) {
-        var charcode = chars.charCodeAt(i);
-        var unicode = encoding[charcode];
-        if ('undefined' == typeof(unicode)) {
-          // FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
-          // at the moment, for unknown reasons.
-          warn('Unencoded charcode '+ charcode);
-          unicode = charcode;
+      
+      if (this.type == "Type0") {
+        //string needs to be converted from byte to multi-byte assume for now two-byte
+        str = '';
+        var multiByteStr = "";
+        var length = chars.length;
+        for (var i = 0; i < length; i++) {
+          var byte1 = chars.charCodeAt(i++) & 0xFF;
+          var byte2;
+          if (i == length)
+            byte2 = 0;
+          else
+            byte2 = chars.charCodeAt(i) & 0xFF;
+          multiByteStr += String.fromCharCode((byte1<<8) | byte2);
          }
-
-        // Check if the glyph has already been converted
-        if (!IsNum(unicode))
-          unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
-
-        // Handle surrogate pairs
-        if (unicode > 0xFFFF) {
-          str += String.fromCharCode(unicode & 0xFFFF);
-          unicode >>= 16;
+        str = multiByteStr;
+      }
+      else {
+        // translate the string using the font's encoding
+        var encoding = this.encoding;
+        if (!encoding)
+          return chars;
+  
+        str = '';
+        for (var i = 0; i < chars.length; ++i) {
+          var charcode = chars.charCodeAt(i);
+          var unicode = encoding[charcode];
+          if ('undefined' == typeof(unicode)) {
+            // FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
+            // at the moment, for unknown reasons.
+            warn('Unencoded charcode '+ charcode);
+            unicode = charcode;
+          }
+  
+          // Check if the glyph has already been converted
+          if (!IsNum(unicode))
+            unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
+  
+          // Handle surrogate pairs
+          if (unicode > 0xFFFF) {
+            str += String.fromCharCode(unicode & 0xFFFF);
+            unicode >>= 16;
+          }
+          str += String.fromCharCode(unicode);
          }
-        str += String.fromCharCode(unicode);
        }
  
        // Enter the translated string into the cache
diff --git a/pdf.js b/pdf.js

index 9dddd8210e2e9e90b44dfbfd562f2f9965e51351..888addd1bf0844abd6eb410025b95b624b6b185d 100644 (file)
--- a/pdf.js
+++ b/pdf.js
@@ -64,6 +64,14 @@ function stringToBytes(str) {
    return bytes;
  }
  
+function singleByteToMultiByteString (str) {
+  var multiByteStr = "";
+  var bytes = stringToBytes(e);
+  for (var j = 0; j<bytes.length; j++) {
+    multiByteStr += String.fromCharCode((bytes[j++]<<16) | bytes[j]);
+  }
+  return multiByteStr;
+}
  var Stream = (function() {
    function constructor(arrayBuffer, start, length, dict) {
      this.bytes = new Uint8Array(arrayBuffer);
@@ -3624,19 +3632,26 @@ var PartialEvaluator = (function() {
      },
  
      translateFont: function(fontDict, xref, resources) {
-      var fd = fontDict.get('FontDescriptor');
-      if (!fd)
+      var fd;
+      var descendant = [];
+      var subType = fontDict.get('Subtype');
+      assertWellFormed(IsName(subType), 'invalid font Subtype');
+      
+      //If font is a composite get the FontDescriptor from the descendant font
+      if (subType.name == "Type0")
        {
-        //If font is a composite get the FontDescriptor from the descendant
          var df = fontDict.get("DescendantFonts");
          if (!df)
            return null;
-        var descendant = xref.fetch(df[0]);
+        descendant = xref.fetch(df[0]);
          fd = descendant.get("FontDescriptor");
-        if (!fd)
-          return null;
-        fontDict.set("FontDescriptor", fd);
+      } else {
+        fd = fontDict.get('FontDescriptor');
        }
+      
+      if (!fd)
+        return null;
+      
        var descriptor = xref.fetch(fd);
  
        var fontName = descriptor.get('FontName');
@@ -3650,7 +3665,32 @@ var PartialEvaluator = (function() {
  
        var encodingMap = {};
        var charset = [];
-      if (fontDict.has('Encoding')) {
+      if (subType.name == 'Type0') {
+        //XXX CIDFont support - only identity CID Encoding for now
+        var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
+        if (IsName(encoding)) {
+          //Encoding is a predefined CMap
+          if (encoding.name == 'Identity-H') {
+            if (descendant.get('Subtype').name == 'CIDFontType2')
+            {
+              //Extract an encoding from the CIDToGIDMap
+              var glyphsStream = xref.fetchIfRef(descendant.get('CIDToGIDMap'));
+              var glyphsData = glyphsStream.getBytes(0);
+              var i = 0;
+              for (var j=0; j<glyphsData.length; j++) {
+                var glyphID = (glyphsData[j++]*0x100)+glyphsData[j];
+                //encodingMap[glyphID] = i++;
+                charset.push(glyphID);
+              }
+              encoding[0] = 0;
+            }
+          } else {
+            TODO ('Need to support predefined CMaps see PDF 32000-1:2008 9.7.5.2 Predefined CMaps')
+          }
+        } else {
+          TODO ('Need to support encoding streams see PDF 32000-1:2008  9.7.5.3'); 
+        }
+      } else if (fontDict.has('Encoding')) {
          var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
          if (IsDict(encoding)) {
            // Build a map of between codes and glyphs
@@ -3682,7 +3722,6 @@ var PartialEvaluator = (function() {
            }
          } else if (IsName(encoding)) {
            var encoding = Encodings[encoding.name];
-          //XXX CIDFont support - get the CID Encoding especially support japan1 and identity
            if (!encoding)
              error('Unknown font encoding');
  
@@ -3767,9 +3806,6 @@ var PartialEvaluator = (function() {
          }
        }
  
-      var subType = fontDict.get('Subtype');
-      assertWellFormed(IsName(subType), 'invalid font Subtype');
-
        var properties = {
          type: subType.name,
          encoding: encodingMap,
author	Adil Allawi <adil@diwan.com>
	Mon, 11 Jul 2011 16:41:47 +0000 (17:41 +0100)
committer	Adil Allawi <adil@diwan.com>
	Mon, 11 Jul 2011 16:41:55 +0000 (17:41 +0100)