: eval 'exec perl -S $0 "$@"' if 0; =begin COPYRIGHT ---------------------------------------------------------------------------- Copyright (C) 2005-2013 Marc Penninga. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA ---------------------------------------------------------------------------- =end COPYRIGHT =cut use strict; use warnings; use integer; use List::Util @List::Util::EXPORT_OK; use Pod::Usage; our ($NUM_GLYPHS, $UNITS_PER_EM, %KPX); sub main { pod2usage(-verbose => 0) if @ARGV != 1; my %font = OTF::get_tables($ARGV[0]); $NUM_GLYPHS = unpack '@4n', $font{maxp}; $UNITS_PER_EM = unpack '@18n', $font{head}; my @glyph_name = map { sprintf "index%d", $_ } 0 .. $NUM_GLYPHS - 1; if (defined $font{CFF}) { OTF::CFF::get_glyph_names($font{CFF}, \@glyph_name); } if (defined $font{kern}) { OTF::Kern::parse_kerntable($font{kern}); } if (defined $font{GPOS}) { my @lookup_list_indices = OTF::GPOS::get_lookup_list_indices($font{GPOS}); my @all_lookups = OTF::GPOS::get_lookups($font{GPOS}); my @lookups = @all_lookups[@lookup_list_indices]; my @subtables = map { OTF::GPOS::get_subtables($_) } @lookups; for my $subtable (@subtables) { my $pos_format = unpack 'n', $subtable; if ($pos_format == 1) { OTF::GPOS::parse_pos_format_1($subtable); } elsif ($pos_format == 2) { OTF::GPOS::parse_pos_format_2($subtable); } else { warn "[ERROR] invalid PosFormat $pos_format ignored"; } } } my $num_kernpairs = sum map { scalar keys %{$KPX{$_}} } keys %KPX; print "StartKernData\nStartKernPairs $num_kernpairs\n"; for my $first (sort { $a <=> $b } keys %KPX) { my $first_glyph = $glyph_name[$first]; for my $second (sort { $a <=> $b } keys %{$KPX{$first}}) { print "KPX $first_glyph $glyph_name[$second] ", "$KPX{$first}{$second}\n"; } } print "EndKernPairs\nEndKernData\n"; return; } ######################################################################## package OTF; #----------------------------------------------------------------------- # Return a hash with all tables (as strings) from the font #----------------------------------------------------------------------- sub get_tables { my $filename = shift; open my $fontfile, '<:raw', $filename or die "[ERROR] can't open $filename: $!"; my $fontdata = do { local $/; <$fontfile> }; close $fontfile; my ($sfnt_version, @tables) = unpack 'a4nx6/(a16)', $fontdata; die "[ERROR] $filename: unknown font type" unless $sfnt_version eq "\x00\x01\x00\x00" || $sfnt_version eq 'OTTO'; my %table = map { my ($tag, $offset, $length) = unpack 'A4@8N@12N', $_; ($tag => substr $fontdata, $offset, $length); } @tables; return %table; } ######################################################################## package OTF::CFF; my @STANDARD_STRINGS = qw( .notdef space exclam quotedbl numbersign dollar percent ampersand quoteright parenleft parenright asterisk plus comma hyphen period slash zero one two three four five six seven eight nine colon semicolon less equal greater question at A B C D E F G H I J K L M N O P Q R S T U V W X Y Z bracketleft backslash bracketright asciicircum underscore quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z braceleft bar braceright asciitilde exclamdown cent sterling fraction yen florin section currency quotesingle quotedblleft guillemotleft guilsinglleft guilsinglright fi fl endash dagger daggerdbl periodcentered paragraph bullet quotesinglbase quotedblbase quotedblright guillemotright ellipsis perthousand questiondown grave acute circumflex tilde macron breve dotaccent dieresis ring cedilla hungarumlaut ogonek caron emdash AE ordfeminine Lslash Oslash OE ordmasculine ae dotlessi lslash oslash oe germandbls onesuperior logicalnot mu trademark Eth onehalf plusminus Thorn onequarter divide brokenbar degree thorn threequarters twosuperior registered minus eth multiply threesuperior copyright Aacute Acircumflex Adieresis Agrave Aring Atilde Ccedilla Eacute Ecircumflex Edieresis Egrave Iacute Icircumflex Idieresis Igrave Ntilde Oacute Ocircumflex Odieresis Ograve Otilde Scaron Uacute Ucircumflex Udieresis Ugrave Yacute Ydieresis Zcaron aacute acircumflex adieresis agrave aring atilde ccedilla eacute ecircumflex edieresis egrave iacute icircumflex idieresis igrave ntilde oacute ocircumflex odieresis ograve otilde scaron uacute ucircumflex udieresis ugrave yacute ydieresis zcaron exclamsmall Hungarumlautsmall dollaroldstyle dollarsuperior ampersandsmall Acutesmall parenleftsuperior parenrightsuperior twodotenleader onedotenleader zerooldstyle oneoldstyle twooldstyle threeoldstyle fouroldstyle fiveoldstyle sixoldstyle sevenoldstyle eightoldstyle nineoldstyle commasuperior threequartersemdash periodsuperior questionsmall asuperior bsuperior centsuperior dsuperior esuperior isuperior lsuperior msuperior nsuperior osuperior rsuperior ssuperior tsuperior ff ffi ffl parenleftinferior parenrightinferior Circumflexsmall hyphensuperior Gravesmall Asmall Bsmall Csmall Dsmall Esmall Fsmall Gsmall Hsmall Ismall Jsmall Ksmall Lsmall Msmall Nsmall Osmall Psmall Qsmall Rsmall Ssmall Tsmall Usmall Vsmall Wsmall Xsmall Ysmall Zsmall colonmonetary onefitted rupiah Tildesmall exclamdownsmall centoldstyle Lslashsmall Scaronsmall Zcaronsmall Dieresissmall Brevesmall Caronsmall Dotaccentsmall Macronsmall figuredash hypheninferior Ogoneksmall Ringsmall Cedillasmall questiondownsmall oneeighth threeeighths fiveeighths seveneighths onethird twothirds zerosuperior foursuperior fivesuperior sixsuperior sevensuperior eightsuperior ninesuperior zeroinferior oneinferior twoinferior threeinferior fourinferior fiveinferior sixinferior seveninferior eightinferior nineinferior centinferior dollarinferior periodinferior commainferior Agravesmall Aacutesmall Acircumflexsmall Atildesmall Adieresissmall Aringsmall AEsmall Ccedillasmall Egravesmall Eacutesmall Ecircumflexsmall Edieresissmall Igravesmall Iacutesmall Icircumflexsmall Idieresissmall Ethsmall Ntildesmall Ogravesmall Oacutesmall Ocircumflexsmall Otildesmall Odieresissmall OEsmall Oslashsmall Ugravesmall Uacutesmall Ucircumflexsmall Udieresissmall Yacutesmall Thornsmall Ydieresissmall 001.000 001.001 001.002 001.003 Black Bold Book Light Medium Regular Roman Semibold ); #----------------------------------------------------------------------- # #----------------------------------------------------------------------- sub get_glyph_names { my ($CFF, $glyph_name) = @_; # skip header; parse (and ignore) NAME index my ($rest) = _parse_index(substr $CFF, unpack '@2C', $CFF); # parse TopDict index ($rest, my @top_dict) = _parse_index($rest); if (@top_dict > 1) { warn "[WARNING] multiple fonts in single file not supported"; } my %top_dict = _parse_dict($top_dict[0]); if ($NUM_GLYPHS != unpack 'n', substr($CFF, $top_dict{17}, 2)) { die "[ERROR] NumGlyphs different in 'maxp' and 'CFF' tables"; } # parse String index ($rest, my @strings) = _parse_index($rest); @strings = (@STANDARD_STRINGS, @strings); my $charset = substr $CFF, $top_dict{15}; my $format = unpack 'C', $charset; if ($format == 0) { my @codes = unpack "\@1n@{[ $NUM_GLYPHS - 1 ]}", $charset; @$glyph_name = @strings[0, @codes]; } elsif ($format == 1) { my $i = 0; $glyph_name->[$i++] = $strings[0]; for (my $j = 0; $i < $NUM_GLYPHS; $j++) { my ($first, $n_left) = unpack "\@@{[ 1 + 3*$j ]}nC", $charset; $glyph_name->[$i + $_] = $strings[$first + $_] for 0 .. $n_left; $i += 1 + $n_left; } } elsif ($format == 2) { my $i = 0; $glyph_name->[$i++] = $strings[0]; for (my $j = 0; $i < $NUM_GLYPHS; $j++) { my ($first, $n_left) = unpack "\@@{[ 1 + 4*$j ]}nn", $charset; $glyph_name->[$i + $_] = $strings[$first + $_] for 0 .. $n_left; $i += 1 + $n_left; } } else { warn "[ERROR] invalid CharsetFormat '$format' ignored"; } return; } #----------------------------------------------------------------------- # Parse Adobe's INDEX format (see CFF spec); return "rest" plus data array #----------------------------------------------------------------------- sub _parse_index { my $index = shift; my ($count, $off_size) = unpack 'nC', $index; my @offsets = unpack "\@3(a$off_size)@{[ $count + 1 ]}", $index; if ($off_size == 1) { $_ = unpack 'C', $_ for @offsets } elsif ($off_size == 2) { $_ = unpack 'n', $_ for @offsets } elsif ($off_size == 4) { $_ = unpack 'N', $_ for @offsets } elsif ($off_size == 3) { for my $offset (@offsets) { my @bytes = map 'C3', $offset; $offset = ($bytes[0] << 16) + ($bytes[1] << 8) + $bytes[2]; } } else { warn "[ERROR] invalid OffSize $off_size ignored"; } $_ += 2 + $off_size * ($count + 1) for @offsets; my @data; for my $i (0 .. $count - 1) { $data[$i] = substr $index, $offsets[$i], $offsets[$i + 1] - $offsets[$i]; } return substr($index, $offsets[$count]), @data; } #----------------------------------------------------------------------- # Parse Adobe's DICT format (see CFF spec); return data hash #----------------------------------------------------------------------- sub _parse_dict { my $dict = shift; my @bytes = unpack 'C*', $dict; my (@operands, %dict); while (@bytes) { if (( 0 <= $bytes[0] && $bytes[0] <= 11) || (13 <= $bytes[0] && $bytes[0] <= 22)) { my $operator = shift @bytes; ($dict{$operator} = join q{ }, @operands) =~ s/^\s+//xms; @operands = (); } elsif ($bytes[0] == 12) { my $operator = join q{ }, splice @bytes, 0, 2; ($dict{$operator} = join q{ }, @operands) =~ s/^\s+//xms; @operands = (); } elsif ($bytes[0] == 28 || $bytes[0] == 29 || $bytes[0] == 30 || (32 <= $bytes[0] && $bytes[0] <= 254)) { push @operands, _get_number(\@bytes); } else { warn "[ERROR] invalid byte $bytes[0] in DICT ignored"; shift @bytes; } } return %dict; } #----------------------------------------------------------------------- # Parse Adobe's variable-length numbers (see CFF spec); shorten arg array #----------------------------------------------------------------------- sub _get_number { my $bytes = shift; my $b0 = shift @$bytes; if ($b0 == 28) { my ($b1, $b2) = splice @$bytes, 0, 2; return ($b1 << 8) | $b2; } elsif ($b0 == 29) { my ($b1, $b2, $b3, $b4) = splice @$bytes, 0, 4; return ((((($b1 << 8) + $b2) << 8) + $b3) << 8) + $b4; } elsif ($b0 == 30) { # Reals not implemented; just dump appropriate number of bytes until (($b0 & 0xf0) == 0xf0 || ($b0 & 0xf) == 0xf) { $b0 = shift @$bytes; } } elsif (32 <= $b0 && $b0 <= 246) { return $b0 - 139; } elsif (247 <= $b0 && $b0 <= 250) { my $b1 = shift @$bytes; return 256 * ($b0 - 247) + $b1 + 108; } elsif (251 <= $b0 && $b0 <= 254) { my $b1 = shift @$bytes; return -256 * ($b0 - 251) - $b1 - 108; } } ######################################################################## package OTF::GPOS; #----------------------------------------------------------------------- # Return list of all Lookups from the GPOS table #----------------------------------------------------------------------- sub get_lookups { my $GPOS = shift; my $lookup_list = substr $GPOS, unpack '@8n', $GPOS; my @lookup_offsets = unpack 'n/n', $lookup_list; return map { substr $lookup_list, $_ } @lookup_offsets; } #----------------------------------------------------------------------- # Return list of all LookupListIndices pointed to by 'kern' feature #----------------------------------------------------------------------- sub get_lookup_list_indices { my $GPOS = shift; my $feature_list = substr $GPOS, unpack '@6n', $GPOS; my %lookup_list_index; for my $feature_record (unpack 'n/(a6)', $feature_list) { my ($tag, $offset) = unpack 'a4n', $feature_record; next if $tag ne 'kern'; my $feature = substr $feature_list, $offset; my @lookup_list_indices = unpack '@2n/n', $feature; $lookup_list_index{$_} = 1 for @lookup_list_indices; } return keys %lookup_list_index; } #----------------------------------------------------------------------- # Return list of all subtables in Lookup table #----------------------------------------------------------------------- sub get_subtables { my $lookup = shift; my ($lookup_type, @subtable_offsets) = unpack 'n@4n/n', $lookup; if ($lookup_type != 2) { warn "[WARNING] LookupType $lookup_type not implemented"; return; } return map { substr $lookup, $_ } @subtable_offsets; } #----------------------------------------------------------------------- # Parse subtable in PairPosFormat 1, store kern pairs in global %KPX #----------------------------------------------------------------------- sub parse_pos_format_1 { my $subtable = shift; my ($coverage_offset, $value_format_1, $value_format_2, @pair_set_offsets) = unpack '@2nnnn/n', $subtable; my @coverage = _get_coverage(substr $subtable, $coverage_offset); my @pair_sets = map { substr $subtable, $_ } @pair_set_offsets; if (@coverage != @pair_sets) { warn "[ERROR] Coverage table doesn't match PairSet table"; return; } my ($record_size, $value_offset) = _parse_value_formats($value_format_1, $value_format_2, 2, 2); while (1) { my $pair_set = shift @pair_sets or last; my $first = shift @coverage; my @pair_value_records = unpack "n/(a$record_size)", $pair_set; for my $pair_value_record (@pair_value_records) { my ($second, $value) = unpack "n\@${value_offset}s>", $pair_value_record; next if $value == 0; $KPX{$first}{$second} ||= 1000 * $value / $UNITS_PER_EM; } } return; } #----------------------------------------------------------------------- # Parse subtable in PairPosFormat 2, store kern pairs in global %KPX #----------------------------------------------------------------------- sub parse_pos_format_2 { my $subtable = shift; my ($coverage_offset, $value_format_1, $value_format_2, $class_def_1, $class_def_2, $class_1_count, $class_2_count) = unpack '@2nnnnnnn', $subtable; my @coverage = _get_coverage(substr $subtable, $coverage_offset); my ($class_2_record_size, $value_offset) = _parse_value_formats($value_format_1, $value_format_2, 0, 0); my @class_1 = _get_class(substr $subtable, $class_def_1); my @class_2 = _get_class(substr $subtable, $class_def_2); my $class_1_record_size = $class_2_count * $class_2_record_size; my @class_1_records = unpack "\@16(a$class_1_record_size)$class_1_count", $subtable; for my $class_1 (0 .. $class_1_count - 1) { my $class_1_record = $class_1_records[$class_1]; my @class_2_records = unpack "(a$class_2_record_size)$class_2_count", $class_1_record; for my $class_2 (0 .. $class_2_count - 1) { my $class_2_record = $class_2_records[$class_2]; my $value = unpack "\@${value_offset}s>", $class_2_record; next if $value == 0; $value = 1000 * $value / $UNITS_PER_EM; my @first = grep { $class_1[$_] == $class_1 } @coverage; my @second = grep { $class_2[$_] == $class_2 } 0 .. $#class_2; for my $first (@first) { for my $second (@second) { $KPX{$first}{$second} ||= $value; } } } } return; } #----------------------------------------------------------------------- # Get class number for all glyphs #----------------------------------------------------------------------- sub _get_class { my $class = shift; my @class = (0) x $NUM_GLYPHS; my $class_format = unpack 'n', $class; if ($class_format == 1) { my ($start_glyph, @class_value) = unpack '@2nn/n', $class; @class[$start_glyph .. $start_glyph + $#class_value] = @class_value; } elsif ($class_format == 2) { my @class_ranges = unpack '@2n/(a6)', $class; for my $class_range (@class_ranges) { my ($start, $end, $class) = unpack 'nnn', $class_range; @class[$start .. $end] = ($class) x ($end - $start + 1); } } else { warn "[ERROR] invalid ClassFormat '$class_format' ignored"; return; } return @class; } #----------------------------------------------------------------------- # Determine size of ValueRecord and offset of XAdvance value in it #----------------------------------------------------------------------- sub _parse_value_formats { my ($value_format_1, $value_format_2, $record_size, $value_offset) = @_; unless ($value_format_1 & 0x4 && $value_format_2 == 0) { warn "[WARNING] ValueFormat ($value_format_1, ", "$value_format_2) not implemented"; return; } for my $bit ((0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80)) { $record_size += 2 if $value_format_1 & $bit; } for my $bit ((0x1, 0x2)) { $value_offset += 2 if $value_format_1 & $bit; } return ($record_size, $value_offset); } #----------------------------------------------------------------------- # Return a list of GlyphIDs in a Coverage table #----------------------------------------------------------------------- sub _get_coverage { my $coverage = shift; my $coverage_format = unpack 'n', $coverage; if ($coverage_format == 1) { return unpack '@2n/n', $coverage; } elsif ($coverage_format == 2) { my @range_records = unpack '@2n/(a6)', $coverage; return map { my ($start, $end) = unpack 'nn', $_; $start .. $end; } @range_records; } else { warn "[WARNING] unknown CoverageFormat $coverage_format ignored"; } } ######################################################################## package OTF::Kern; #----------------------------------------------------------------------- # Parse "kern"table, store kern pairs in global %KPX #----------------------------------------------------------------------- sub parse_kerntable { my $kern = shift; my $num_tables = unpack '@2n', $kern; my $start_subtable = 4; for my $i (0 .. $num_tables - 1) { my $subtable = substr $kern, $start_subtable; my ($length, $coverage) = unpack '@2nn', $subtable; $start_subtable += $length; if ($coverage != 0x1) { warn "[WARNING] kern table format '$coverage' not implemented"; next; } my @kern_pairs = unpack '@6nx6/(a6)', $subtable; for my $kern_pair (@kern_pairs) { my ($first, $second, $value) = unpack 'nns>', $kern_pair; next if $value == 0; $KPX{$first}{$second} ||= 1000 * $value / $UNITS_PER_EM; } } return; } ######################################################################## package main; main(); __END__ ######################################################################## To create the documentation: pod2man --center="Marc Penninga" --release="fontools" --section=1 \ ot2kpx - | groff -Tps -man - | ps2pdf - ot2kpx.pdf =pod =head1 NAME ot2kpx - extract kerning information from OpenType fonts =head1 SYNOPSIS =over 8 =item B B =back =head1 DESCRIPTION B extract the kerning data from OpenType fonts (both F and F formats) and prints it (in F format) to C. =head1 OPTIONS AND ARGUMENTS =over 4 =item B The OpenType font (in either F or F format). =back =head1 RESTRICTIONS =over 4 =item B<-> B doesn't implement all of the OpenType specification. Things that are missing include: support for font files containing multiple fonts, LookupTables with LookupTypes other than 2, "kern" tables with format other than 0 and ValueRecords with other types of data than just XAdvance data. Most of these limitations won't matter, since the missing features are rare (the only fonts I know of that use them are the non-western fonts that come with Adobe Reader). Furthermore, many of these features define (according to the OpenType specification) I<"subtle, device-dependent adjustments at specific font sizes or device resolutions">; since there's no way to express such adjustments in F format, ignoring them seems to be the only option anyway. =item B<-> B collects kerning data first from the "kern" table, then from all LookupTables associated with the "kern" feature; if a kerning pair occurs multiple times, the first value seen is chosen. There are (or may be) several issues with this approach: =over 4 =item - The OpenType specification says that fonts in F format shouldn't use the "kern" table at all, just the lookups from the "GPOS" table. Many such fonts do, however, contain a "kern" table, but no "GPOS" table; so we use the "kern" table anyway. =item - Instead of reading all LookupTables, it might be better to let the user specify a script and language and process only the LookupTables for those values. However, at least in the fonts I checked, all script/language combinations eventually point to the I "kern" LookupTables, so this approach wouldn't make any difference (apart from further complicating the code). =back =back =head1 AUTHOR Marc Penninga =head1 COPYRIGHT Copyright (C) 2005-2013 Marc Penninga. =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. A copy of the GNU General Public License is included with B; see the file F. =head1 DISCLAIMER This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. =head1 RECENT CHANGES (See the source code for the rest of the story.) =over 12 =item I<2013-08-07> Replaced all C constructions in the source code by C's, to avoid warnings about experimental features in Perl 5.18 and later. =item I<2012-02-01> Refactored the code, and fixed a number of bugs in the process. Updated the documentation. =back =begin Really_old_history =over 12 =item I<2005-01-10> First version =item I<2005-02-18> Rewrote some of the code =item I<2005-03-08> Input files searched via B (where available) =item I<2005-03-15> Input files searched using B or B =item I<2005-03-21> Test if GPOS table is present before trying to read it =item I<2005-04-29> Improved the documentation =item I<2005-05-25> Changed warning that's given when the font contains no GPOS table, to an informational message. =item I<2005-07-29> A few updates to the documentation =back =cut