Skip to content

Commit ab59162

Browse files
authored
Merge pull request #17 from mdboom/more-subsetting-fixes
More subsetting fixes
2 parents 6c85d3f + a47d115 commit ab59162

File tree

2 files changed

+212
-9
lines changed

2 files changed

+212
-9
lines changed

doc/source/api.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ OpenType fonts.
5353

5454
.. autosummary::
5555
:toctree: _generated
56-
:template: autosummary/module.rst
56+
:template: autosummary/base.rst
5757

58-
subset
58+
subset.subset_font
5959

6060
Glyph
6161
-----
@@ -122,6 +122,7 @@ TrueType information
122122
TT_Postscript
123123
TT_PLATFORM
124124
TT_APPLE_ID
125+
TT_ISO_ID
125126
TT_MAC_ID
126127
TT_MAC_LANGID
127128
TT_MS_ID

lib/freetypy/subset.py

Lines changed: 209 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
import struct
5555

5656

57-
from freetypy import Face
57+
from freetypy import Face, TT_PLATFORM, TT_ISO_ID, TT_MS_ID
5858

5959

6060
UNDERSTOOD_VERSIONS = (0x00010000, 0x4f54544f)
@@ -307,8 +307,6 @@ def calculate_skip(flags):
307307

308308
i += calculate_skip(flags)
309309

310-
all_glyphs = list(all_glyphs)
311-
all_glyphs.sort()
312310
return all_glyphs
313311

314312
def subset(self, glyphs, offsets):
@@ -349,7 +347,7 @@ def _subset_format2(self, glyphs):
349347
name_index < numglyphs - N_BASIC_NAMES):
350348
needed_indices[name_index - N_BASIC_NAMES] = gind
351349

352-
names = []
350+
names = [b'.removed']
353351
name_index = 0
354352
i += 2 * numglyphs
355353
while i < len(content):
@@ -364,8 +362,8 @@ def _subset_format2(self, glyphs):
364362
name_index += 1
365363

366364
new_content = [content[0:36]]
367-
for i in range(numglyphs):
368-
val = new_glyph_index.get(i, 0)
365+
for i in range(1, numglyphs):
366+
val = new_glyph_index.get(i, N_BASIC_NAMES)
369367
new_content.append(struct.pack('>H', val))
370368

371369
for name in names:
@@ -379,8 +377,205 @@ def subset(self, glyphs):
379377
self.content = self._subset_format2(glyphs)
380378

381379

380+
class _HheaTable(_Table):
381+
hhea_table_struct = _BinaryStruct([
382+
('version', 'I'),
383+
('ascent', 'h'),
384+
('descent', 'h'),
385+
('lineGap', 'h'),
386+
('advanceWidthMax', 'H'),
387+
('minLeftSideBearing', 'h'),
388+
('minRightSideBearing', 'h'),
389+
('xMaxExtent', 'h'),
390+
('caretSlopeRise', 'h'),
391+
('caretSlopeRun', 'h'),
392+
('caretOffset', 'h'),
393+
('res0', 'h'),
394+
('res1', 'h'),
395+
('res2', 'h'),
396+
('res3', 'h'),
397+
('metricDataFormat', 'h'),
398+
('numOfLongHorMetrics', 'H')])
399+
400+
def __init__(self, header, content):
401+
super(_HheaTable, self).__init__(header, content)
402+
403+
self.__dict__.update(self.hhea_table_struct.unpack(content))
404+
405+
406+
class _HmtxTable(_Table):
407+
def subset(self, glyph_set, offsets, hhea):
408+
# In keeping with not changing glyph ids, we can't actually
409+
# remove entries here. However, we can set unused entries to
410+
# 0 which should aid in compression.
411+
412+
n_glyphs = len(offsets) - 1
413+
n_long_hor_metrics = hhea.numOfLongHorMetrics
414+
content = self.content
415+
416+
h_metrics = content[:n_long_hor_metrics*4]
417+
new_values = []
418+
for i in range(n_long_hor_metrics):
419+
if i in glyph_set:
420+
new_values.append(h_metrics[i*4:i*4+4])
421+
else:
422+
new_values.append(b'\0\0\0\0')
423+
424+
left_side_bearing = content[n_long_hor_metrics*4:]
425+
for i in range(n_glyphs - n_long_hor_metrics):
426+
if i + n_long_hor_metrics in glyph_set:
427+
new_values.append(left_side_bearing[i*2:i*2+2])
428+
else:
429+
new_values.append(b'\0\0')
430+
431+
self.content = b''.join(new_values)
432+
433+
434+
class _CmapTable(_Table):
435+
cmap_table_struct = _BinaryStruct([
436+
('version', 'H'),
437+
('numTables', 'H')])
438+
439+
cmap_subtable_struct = _BinaryStruct([
440+
('platformID', 'H'),
441+
('encodingID', 'H'),
442+
('offset', 'I')])
443+
444+
class _CmapSubtable(object):
445+
format_12_struct = _BinaryStruct([
446+
('format', 'H'),
447+
('unused', 'H'),
448+
('length', 'I'),
449+
('language', 'I'),
450+
('n_groups', 'I')])
451+
452+
def __init__(self, content, offset, glyphs):
453+
self.offset = offset
454+
455+
format, = struct.unpack('>H', content[offset:offset+2])
456+
if format in (0, 2, 4, 6):
457+
self.length, = struct.unpack(
458+
'>H', content[offset+2:offset+4])
459+
elif format in (8, 10, 12, 13, 14):
460+
self.length, = struct.unpack(
461+
'>I', content[offset+4:offset+8])
462+
else:
463+
raise ValueError("Unknown cmap table type")
464+
465+
self.format = format
466+
self.content = content[self.offset:self.offset+self.length]
467+
468+
if format == 12:
469+
self._subset_format12(glyphs)
470+
471+
def _subset_format12(self, glyph_set):
472+
content = self.content
473+
header = self.format_12_struct.unpack(content[:16])
474+
chars = []
475+
for i in range(header['n_groups']):
476+
start_char, end_char, start_glyph = struct.unpack(
477+
'>III', content[16+(i*12):28+(i*12)])
478+
479+
for ccode in range(start_char, end_char + 1):
480+
gind = start_glyph + (ccode - start_char)
481+
if gind in glyph_set:
482+
chars.append((ccode, gind))
483+
484+
if len(chars) < 2:
485+
return
486+
487+
new_groups = [[chars[0][0], chars[0][0], chars[0][1]]]
488+
last_ccode = chars[0][0]
489+
last_gind = chars[0][1]
490+
491+
for ccode, gind in chars[1:]:
492+
if gind - ccode == last_gind - last_ccode:
493+
new_groups[-1][1] = ccode
494+
else:
495+
new_groups.append([ccode, ccode, gind])
496+
last_ccode = ccode
497+
last_gind = gind
498+
499+
new_content = [
500+
self.format_12_struct.pack(
501+
format=12, unused=0, length=16 + 12 * len(new_groups),
502+
language=header['language'], n_groups=len(new_groups))]
503+
504+
for start, end, gind in new_groups:
505+
new_content.append(struct.pack('>III', start, end, gind))
506+
507+
self.content = b''.join(new_content)
508+
self.length = len(self.content)
509+
510+
def is_unicode_table(self, header):
511+
if header['platformID'] == TT_PLATFORM.APPLE_UNICODE:
512+
return True
513+
elif (header['platformID'] == TT_PLATFORM.ISO and
514+
header['encodingID'] == TT_ISO_ID.ISO_10646):
515+
return True
516+
elif (header['platformID'] == TT_PLATFORM.MICROSOFT and
517+
header['encodingID'] in (TT_MS_ID.UNICODE_CS, TT_MS_ID.UCS_4)):
518+
return True
519+
return False
520+
521+
def subset(self, glyph_set):
522+
# This removes all but the master unicode table. We could
523+
# probably do more by shrinking that table, but this is good
524+
# as a first pass
525+
content = self.content
526+
527+
header = self.cmap_table_struct.unpack(content[:4])
528+
529+
i = 4
530+
tables = {}
531+
entries = []
532+
for table_num in range(header['numTables']):
533+
subheader = self.cmap_subtable_struct.unpack(content[i:i+8])
534+
if self.is_unicode_table(subheader):
535+
if subheader['offset'] in tables:
536+
table = tables[subheader['offset']]
537+
else:
538+
try:
539+
table = self._CmapSubtable(content, subheader['offset'], glyph_set)
540+
except ValueError:
541+
# If unknown cmap table types, just abort on subsetting
542+
return
543+
tables[subheader['offset']] = table
544+
entries.append((subheader, table))
545+
i += 8
546+
547+
# If we don't have a Unicode table, just leave everything intact
548+
if len(entries) == 0:
549+
return
550+
551+
tables = list(tables.values())
552+
offset = 4 + len(entries) * 8
553+
for table in tables:
554+
table.offset = offset
555+
offset += table.length
556+
557+
new_content = [
558+
self.cmap_table_struct.pack(
559+
version=header['version'],
560+
numTables=len(entries))]
561+
562+
for subheader, table in entries:
563+
new_content.append(self.cmap_subtable_struct.pack(
564+
platformID=subheader['platformID'],
565+
encodingID=subheader['encodingID'],
566+
offset=table.offset))
567+
568+
for table in tables:
569+
new_content.append(table.content)
570+
571+
self.content = b''.join(new_content)
572+
573+
382574
SPECIAL_TABLES = {
575+
b'cmap': _CmapTable,
383576
b'head': _HeadTable,
577+
b'hhea': _HheaTable,
578+
b'hmtx': _HmtxTable,
384579
b'loca': _LocaTable,
385580
b'glyf': _GlyfTable,
386581
b'post': _PostTable
@@ -450,12 +645,19 @@ def subset(self, ccodes):
450645
offsets = self[b'loca'].get_offsets(self)
451646
# Find all glyphs used, including components of compound
452647
# glyphs
453-
glyphs = self[b'glyf'].find_all_glyphs(glyphs, offsets)
648+
glyph_set = self[b'glyf'].find_all_glyphs(glyphs, offsets)
649+
650+
glyphs = list(glyph_set)
651+
glyphs.sort()
454652

455653
self[b'glyf'].subset(glyphs, offsets)
456654
self[b'loca'].subset(self, glyphs, offsets)
457655
if b'post' in self._tables:
458656
self[b'post'].subset(glyphs)
657+
if b'hmtx' in self._tables and b'hhea' in self._tables:
658+
self[b'hmtx'].subset(glyph_set, offsets, self[b'hhea'])
659+
if b'cmap' in self._tables:
660+
self[b'cmap'].subset(glyph_set)
459661

460662
def write(self, fd):
461663
self._header['numTables'] = len(self._tables)

0 commit comments

Comments
 (0)