From: notmasteryet Date: Sat, 12 Nov 2011 17:09:19 +0000 (-0600) Subject: Merge branch 'master' of git://github.com/mozilla/pdf.js.git into encoding-2 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=60744a8a61a61f53f2f6d67d19a17580eb23b618;p=pdf.js.git Merge branch 'master' of git://github.com/mozilla/pdf.js.git into encoding-2 Conflicts: src/evaluator.js src/fonts.js --- 60744a8a61a61f53f2f6d67d19a17580eb23b618 diff --cc src/evaluator.js index bb2efaa,064288c..619a633 --- a/src/evaluator.js +++ b/src/evaluator.js @@@ -512,174 -559,169 +512,175 @@@ var PartialEvaluator = (function partia error('Encoding is not a Name nor a Dict'); } } + properties.differences = differences; + properties.baseEncoding = baseEncoding; + properties.hasEncoding = hasEncoding; + }, - if (!baseEncoding) { - switch (type) { - case 'TrueType': - baseEncoding = Encodings.WinAnsiEncoding.slice(); - break; - case 'Type1': - case 'Type3': - baseEncoding = Encodings.StandardEncoding.slice(); - break; - default: - warn('Unknown type of font: ' + type); - baseEncoding = []; - break; - } - } - - // merge in the differences - var firstChar = properties.firstChar; - var lastChar = properties.lastChar; - var widths = properties.widths || []; - var glyphs = {}; - for (var i = firstChar; i <= lastChar; i++) { - var glyph = differences[i]; - var replaceGlyph = true; - if (!glyph) { - glyph = baseEncoding[i] || i; - replaceGlyph = false; - } - var index = GlyphsUnicode[glyph] || i; - var width = widths[i] || widths[glyph]; - map[i] = { - unicode: index, - width: isNum(width) ? width : properties.defaultWidth - }; - - if (replaceGlyph || !glyphs[glyph]) - glyphs[glyph] = map[i]; - if (replaceGlyph || !glyphs[index]) - glyphs[index] = map[i]; - - // If there is no file, the character mapping can't be modified - // but this is unlikely that there is any standard encoding with - // chars below 0x1f, so that's fine. - if (!properties.file) - continue; + readToUnicode: + function partialEvaluatorReadToUnicode(toUnicode, xref) { + var cmapObj = xref.fetchIfRef(toUnicode); + var charToUnicode = []; + if (isName(cmapObj)) { + error('ToUnicode file cmap translation not implemented'); + } else if (isStream(cmapObj)) { + var tokens = []; + var token = ''; + var beginArrayToken = {}; + + var cmap = cmapObj.getBytes(cmapObj.length); - for (var i = 0; i < cmap.length; i++) { ++ for (var i = 0, ii = cmap.length; i < ii; i++) { + var byte = cmap[i]; + if (byte == 0x20 || byte == 0x0D || byte == 0x0A || + byte == 0x3C || byte == 0x5B || byte == 0x5D) { + switch (token) { + case 'usecmap': + error('usecmap is not implemented'); + break; - if (index <= 0x1f || (index >= 127 && index <= 255)) - map[i].unicode += kCmapGlyphOffset; - } + case 'beginbfchar': + case 'beginbfrange': + case 'begincidchar': + case 'begincidrange': + token = ''; + tokens = []; + break; - if (type == 'TrueType' && dict.has('ToUnicode') && differences) { - var cmapObj = dict.get('ToUnicode'); - if (isRef(cmapObj)) { - cmapObj = xref.fetch(cmapObj); - } - if (isName(cmapObj)) { - error('ToUnicode file cmap translation not implemented'); - } else if (isStream(cmapObj)) { - var tokens = []; - var token = ''; - var beginArrayToken = {}; - - var cmap = cmapObj.getBytes(cmapObj.length); - for (var i = 0, ii = cmap.length; i < ii; i++) { - var byte = cmap[i]; - if (byte == 0x20 || byte == 0x0D || byte == 0x0A || - byte == 0x3C || byte == 0x5B || byte == 0x5D) { - switch (token) { - case 'usecmap': - error('usecmap is not implemented'); - break; - - case 'beginbfchar': - case 'beginbfrange': - case 'begincidchar': - case 'begincidrange': - token = ''; - tokens = []; - break; - - case 'endcidrange': - case 'endbfrange': - for (var j = 0, jj = tokens.length; j < jj; j += 3) { - var startRange = tokens[j]; - var endRange = tokens[j + 1]; - var code = tokens[j + 2]; - while (startRange < endRange) { - var mapping = map[startRange] || {}; - mapping.unicode = code++; - map[startRange] = mapping; - ++startRange; - } - } - break; - - case 'endcidchar': - case 'endbfchar': - for (var j = 0, jj = tokens.length; j < jj; j += 2) { - var index = tokens[j]; - var code = tokens[j + 1]; - var mapping = map[index] || {}; - mapping.unicode = code; - map[index] = mapping; + case 'endcidrange': + case 'endbfrange': - for (var j = 0; j < tokens.length; j += 3) { ++ for (var j = 0, jj = tokens.length; j < jj; j += 3) { + var startRange = tokens[j]; + var endRange = tokens[j + 1]; + var code = tokens[j + 2]; + while (startRange <= endRange) { + charToUnicode[startRange] = code++; + ++startRange; } - break; + } + break; - case '': - break; + case 'endcidchar': + case 'endbfchar': - for (var j = 0; j < tokens.length; j += 2) { ++ for (var j = 0, jj = tokens.length; j < jj; j += 2) { + var index = tokens[j]; + var code = tokens[j + 1]; + charToUnicode[index] = code; + } + break; - default: - if (token[0] >= '0' && token[0] <= '9') - token = parseInt(token, 10); // a number - tokens.push(token); - token = ''; - } - switch (byte) { - case 0x5B: - // begin list parsing - tokens.push(beginArrayToken); - break; - case 0x5D: - // collect array items - var items = [], item; - while (tokens.length && - (item = tokens.pop()) != beginArrayToken) - items.unshift(item); - tokens.push(items); - break; - } - } else if (byte == 0x3E) { - if (token.length) { - // parsing hex number - tokens.push(parseInt(token, 16)); + case '': + break; + + default: + if (token[0] >= '0' && token[0] <= '9') + token = parseInt(token, 10); // a number + tokens.push(token); token = ''; - } - } else { - token += String.fromCharCode(byte); } + switch (byte) { + case 0x5B: + // begin list parsing + tokens.push(beginArrayToken); + break; + case 0x5D: + // collect array items + var items = [], item; + while (tokens.length && + (item = tokens.pop()) != beginArrayToken) + items.unshift(item); + tokens.push(items); + break; + } + } else if (byte == 0x3E) { + if (token.length) { + // parsing hex number + tokens.push(parseInt(token, 16)); + token = ''; + } + } else { + token += String.fromCharCode(byte); } } } - return glyphs; + return charToUnicode; }, + readCidToGidMap: + function partialEvaluatorReadCidToGidMap(cidToGidStream) { + // Extract the encoding from the CIDToGIDMap + var glyphsData = cidToGidStream.getBytes(); + + // Set encoding 0 to later verify the font has an encoding + var result = []; - for (var j = 0; j < glyphsData.length; j++) { ++ for (var j = 0, jj = glyphsData.length; j < jj; j++) { + var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; + if (glyphID == 0) + continue; - getBaseFontMetricsAndMap: function getBaseFontMetricsAndMap(name) { - var map = {}; - if (/^Symbol(-?(Bold|Italic))*$/.test(name)) { - // special case for symbols - var encoding = Encodings.symbolsEncoding.slice(); - for (var i = 0, n = encoding.length, j; i < n; i++) { - j = encoding[i]; - if (!j) - continue; - map[i] = GlyphsUnicode[j] || 0; - } + var code = j >> 1; + result[code] = glyphID; } + return result; + }, + extractWidths: function partialEvaluatorWidths(dict, + xref, + descriptor, + properties) { + var glyphsWidths = []; var defaultWidth = 0; - var widths = Metrics[stdFontMap[name] || name]; - if (isNum(widths)) { - defaultWidth = widths; - widths = null; + if (properties.composite) { + defaultWidth = xref.fetchIfRef(dict.get('DW')) || 1000; + + var widths = xref.fetchIfRef(dict.get('W')); + if (widths) { + var start = 0, end = 0; - for (var i = 0; i < widths.length; i++) { ++ for (var i = 0, ii = widths.length; i < ii; i++) { + var code = widths[i]; + if (isArray(code)) { - for (var j = 0; j < code.length; j++) ++ for (var j = 0, jj = code.length; j < jj; j++) + glyphsWidths[start++] = code[j]; + start = 0; + } else if (start) { + var width = widths[++i]; + for (var j = start; j <= code; j++) + glyphsWidths[j] = width; + start = 0; + } else { + start = code; + } + } + } + } else { + var firstChar = properties.firstChar; + var widths = xref.fetchIfRef(dict.get('Widths')); + if (widths) { - for (var i = 0, j = firstChar; i < widths.length; i++, j++) - glyphsWidths[j] = widths[i]; ++ var j = firstChar; ++ for (var i = 0, ii = widths.length; i < ii; i++) ++ glyphsWidths[j++] = widths[i]; + defaultWidth = parseFloat(descriptor.get('MissingWidth')) || 0; + } else { + // Trying get the BaseFont metrics (see comment above). + var baseFontName = dict.get('BaseFont'); + if (isName(baseFontName)) { + var metrics = this.getBaseFontMetrics(baseFontName.name); + + glyphsWidths = metrics.widths; + defaultWidth = metrics.defaultWidth; + } + } + } + + properties.defaultWidth = defaultWidth; + properties.widths = glyphsWidths; + }, + + getBaseFontMetrics: function getBaseFontMetrics(name) { + var defaultWidth = 0, widths = []; + var glyphWidths = Metrics[stdFontMap[name] || name]; + if (isNum(glyphWidths)) { + defaultWidth = glyphWidths; + } else { + widths = glyphWidths; } return { @@@ -758,8 -802,27 +759,7 @@@ // TODO Fill the width array depending on which of the base font this is // a variant. var firstChar = xref.fetchIfRef(dict.get('FirstChar')) || 0; - var lastChar = xref.fetchIfRef(dict.get('LastChar')) || 256; - var defaultWidth = 0; - var glyphWidths = {}; - var encoding = {}; - var widths = xref.fetchIfRef(dict.get('Widths')); - if (widths) { - for (var i = 0, j = firstChar, ii = widths.length; i < ii; i++, j++) - glyphWidths[j] = widths[i]; - defaultWidth = parseFloat(descriptor.get('MissingWidth')) || 0; - } else { - // Trying get the BaseFont metrics (see comment above). - var baseFontName = dict.get('BaseFont'); - if (isName(baseFontName)) { - var metricsAndMap = this.getBaseFontMetricsAndMap(baseFontName.name); - - glyphWidths = metricsAndMap.widths; - defaultWidth = metricsAndMap.defaultWidth; - encoding = metricsAndMap.map; - } - } - + var lastChar = xref.fetchIfRef(dict.get('LastChar')) || maxCharIndex; - var fontName = xref.fetchIfRef(descriptor.get('FontName')); assertWellFormed(isName(fontName), 'invalid font name'); diff --cc src/fonts.js index c3fcb3c,ca02bb0..116bb4d --- a/src/fonts.js +++ b/src/fonts.js @@@ -672,46 -672,8 +672,46 @@@ var UnicodeRanges = { 'begin': 0x1F030, 'end': 0x1F09F } // Domino Tiles ]; +var MacStandardGlyphOrdering = [ + '.notdef', '.null', 'nonmarkingreturn', 'space', 'exclam', 'quotedbl', + 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', + 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', + 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', + 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', + 'backslash', 'bracketright', 'asciicircum', 'underscore', 'grave', 'a', 'b', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', + 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', + 'asciitilde', 'Adieresis', 'Aring', 'Ccedilla', 'Eacute', 'Ntilde', + 'Odieresis', 'Udieresis', 'aacute', 'agrave', 'acircumflex', 'adieresis', + 'atilde', 'aring', 'ccedilla', 'eacute', 'egrave', 'ecircumflex', 'edieresis', + 'iacute', 'igrave', 'icircumflex', 'idieresis', 'ntilde', 'oacute', 'ograve', + 'ocircumflex', 'odieresis', 'otilde', 'uacute', 'ugrave', 'ucircumflex', + 'udieresis', 'dagger', 'degree', 'cent', 'sterling', 'section', 'bullet', + 'paragraph', 'germandbls', 'registered', 'copyright', 'trademark', 'acute', + 'dieresis', 'notequal', 'AE', 'Oslash', 'infinity', 'plusminus', 'lessequal', + 'greaterequal', 'yen', 'mu', 'partialdiff', 'summation', 'product', 'pi', + 'integral', 'ordfeminine', 'ordmasculine', 'Omega', 'ae', 'oslash', + 'questiondown', 'exclamdown', 'logicalnot', 'radical', 'florin', + 'approxequal', 'Delta', 'guillemotleft', 'guillemotright', 'ellipsis', + 'nonbreakingspace', 'Agrave', 'Atilde', 'Otilde', 'OE', 'oe', 'endash', + 'emdash', 'quotedblleft', 'quotedblright', 'quoteleft', 'quoteright', + 'divide', 'lozenge', 'ydieresis', 'Ydieresis', 'fraction', 'currency', + 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'daggerdbl', 'periodcentered', + 'quotesinglbase', 'quotedblbase', 'perthousand', 'Acircumflex', + 'Ecircumflex', 'Aacute', 'Edieresis', 'Egrave', 'Iacute', 'Icircumflex', + 'Idieresis', 'Igrave', 'Oacute', 'Ocircumflex', 'apple', 'Ograve', 'Uacute', + 'Ucircumflex', 'Ugrave', 'dotlessi', 'circumflex', 'tilde', 'macron', + 'breve', 'dotaccent', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron', + 'Lslash', 'lslash', 'Scaron', 'scaron', 'Zcaron', 'zcaron', 'brokenbar', + 'Eth', 'eth', 'Yacute', 'yacute', 'Thorn', 'thorn', 'minus', 'multiply', + 'onesuperior', 'twosuperior', 'threesuperior', 'onehalf', 'onequarter', + 'threequarters', 'franc', 'Gbreve', 'gbreve', 'Idotaccent', 'Scedilla', + 'scedilla', 'Cacute', 'cacute', 'Ccaron', 'ccaron', 'dcroat']; + function getUnicodeRangeFor(value) { - for (var i = 0; i < UnicodeRanges.length; i++) { + for (var i = 0, ii = UnicodeRanges.length; i < ii; i++) { var range = UnicodeRanges[i]; if (value >= range.begin && value < range.end) return i; @@@ -1868,8 -1657,9 +1868,8 @@@ var Font = (function Font() // Horizontal metrics 'hmtx': (function fontFieldsHmtx() { var hmtx = '\x00\x00\x00\x00'; // Fake .notdef - for (var i = 0; i < charstrings.length; i++) - for (var i = 0, ii = charstrings.length; i < ii; i++) { ++ for (var i = 0, ii = charstrings.length; i < ii; i++) hmtx += string16(charstrings[i].width) + string16(0); - } return stringToArray(hmtx); })(), @@@ -1916,17 -1707,42 +1916,17 @@@ cidToUnicode = CIDToUnicodeMaps[ cidSystemInfo.registry + '-' + cidSystemInfo.ordering]; } - if (!cidToUnicode) { - // the font is directly characters to glyphs with no encoding - // so create an identity encoding - for (i = 0; i < 0xD800; i++) { - var width = glyphsWidths[i]; - encoding[i] = { - unicode: i, - width: isNum(width) ? width : defaultWidth - }; - } - // skipping surrogates + 256-user defined - for (i = 0xE100; i <= 0xFFFF; i++) { - var width = glyphsWidths[i]; - encoding[i] = { - unicode: i, - width: isNum(width) ? width : defaultWidth - }; - } - return; - } - encoding[0] = { unicode: 0, width: 0 }; - var glyph = 1, i, j, k, cidLength, ii; + if (!cidToUnicode) + return; // identity encoding + - var glyph = 1, i, j, k; - for (i = 0; i < cidToUnicode.length; ++i) { ++ var glyph = 1, i, j, k, ii; + for (i = 0, ii = cidToUnicode.length; i < ii; ++i) { var unicode = cidToUnicode[i]; - var width; if (isArray(unicode)) { var length = unicode.length; - width = glyphsWidths[glyph]; - for (j = 0; j < length; j++) { - k = unicode[j]; - encoding[k] = { - unicode: k, - width: isNum(width) ? width : defaultWidth - }; - } + for (j = 0; j < length; j++) + cidToUnicodeMap[unicode[j]] = glyph; glyph++; } else if (typeof unicode === 'object') { var fillLength = unicode.f; @@@ -2081,9 -1843,16 +2080,9 @@@ } } else { - for (var i = 0; i < chars.length; ++i) { + for (var i = 0, ii = chars.length; i < ii; ++i) { var charcode = chars.charCodeAt(i); - var glyph = encoding[charcode]; - if ('undefined' == typeof(glyph)) { - warn('Unencoded charcode ' + charcode); - glyph = { - unicode: charcode, - width: this.defaultWidth - }; - } + var glyph = this.charToGlyph(charcode); glyphs.push(glyph); if (charcode == 0x20) glyphs.push(null); @@@ -3130,43 -2863,52 +3130,43 @@@ var Type2CFF = (function type2CFF() getCharStrings: function cff_charstrings(charsets, encoding, privateDict, properties) { - var defaultWidth = privateDict['defaultWidthX']; var charstrings = []; - var firstChar = properties.firstChar; - var glyphMap = {}; + var unicodeUsed = []; + var unassignedUnicodeItems = []; - for (var i = 0; i < charsets.length; i++) { + for (var i = 0, ii = charsets.length; i < ii; i++) { var glyph = charsets[i]; + var encodingFound = false; for (var charcode in encoding) { - if (encoding[charcode] == i) - glyphMap[glyph] = charcode | 0; + if (encoding[charcode] == i) { + var code = charcode | 0; + charstrings.push({ + unicode: adaptUnicode(code), + code: code, + gid: i, + glyph: glyph + }); + unicodeUsed[code] = true; + encodingFound = true; + break; + } + } + if (!encodingFound) { + unassignedUnicodeItems.push(i); } } - var differences = properties.differences; - for (var i = 0, ii = differences.length; i < ii; ++i) { - var glyph = differences[i]; - if (!glyph) - continue; - var oldGlyph = charsets[i]; - if (oldGlyph) - delete glyphMap[oldGlyph]; - glyphMap[differences[i]] = i; - } - - var glyphs = properties.glyphs; - for (var i = 1, ii = charsets.length; i < ii; i++) { - var glyph = charsets[i]; - var code = glyphMap[glyph] || 0; - - var mapping = glyphs[code] || glyphs[glyph] || { width: defaultWidth }; - var unicode = mapping.unicode; - - if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255)) - unicode += kCmapGlyphOffset; - - var width = (mapping.hasOwnProperty('width') && isNum(mapping.width)) ? - mapping.width : defaultWidth; - properties.encoding[code] = { - unicode: unicode, - width: width - }; - + var nextUnusedUnicode = 0x21; - for (var j = 0; j < unassignedUnicodeItems.length; ++j) { ++ for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; ++j) { + var i = unassignedUnicodeItems[j]; + // giving unicode value anyway + while (unicodeUsed[nextUnusedUnicode]) + nextUnusedUnicode++; + var code = nextUnusedUnicode++; charstrings.push({ - unicode: unicode, - width: width, + unicode: adaptUnicode(code), code: code, - gid: i + gid: i, + glyph: charsets[i] }); } @@@ -3197,9 -2964,9 +3197,9 @@@ if (pos == 0 || pos == 1) { var gid = 1; - var baseEncoding = pos ? Encodings.ExpertEncoding.slice() : - Encodings.StandardEncoding.slice(); + var baseEncoding = pos ? Encodings.ExpertEncoding : + Encodings.StandardEncoding; - for (var i = 0; i < charset.length; i++) { + for (var i = 0, ii = charset.length; i < ii; i++) { var index = baseEncoding.indexOf(charset[i]); if (index != -1) encoding[index] = gid++;