Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More subsetting fixes #17

Merged
merged 4 commits into from
Sep 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ OpenType fonts.

.. autosummary::
:toctree: _generated
:template: autosummary/module.rst
:template: autosummary/base.rst

subset
subset.subset_font

Glyph
-----
Expand Down Expand Up @@ -122,6 +122,7 @@ TrueType information
TT_Postscript
TT_PLATFORM
TT_APPLE_ID
TT_ISO_ID
TT_MAC_ID
TT_MAC_LANGID
TT_MS_ID
Expand Down
216 changes: 209 additions & 7 deletions lib/freetypy/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
import struct


from freetypy import Face
from freetypy import Face, TT_PLATFORM, TT_ISO_ID, TT_MS_ID


UNDERSTOOD_VERSIONS = (0x00010000, 0x4f54544f)
Expand Down Expand Up @@ -307,8 +307,6 @@ def calculate_skip(flags):

i += calculate_skip(flags)

all_glyphs = list(all_glyphs)
all_glyphs.sort()
return all_glyphs

def subset(self, glyphs, offsets):
Expand Down Expand Up @@ -349,7 +347,7 @@ def _subset_format2(self, glyphs):
name_index < numglyphs - N_BASIC_NAMES):
needed_indices[name_index - N_BASIC_NAMES] = gind

names = []
names = [b'.removed']
name_index = 0
i += 2 * numglyphs
while i < len(content):
Expand All @@ -364,8 +362,8 @@ def _subset_format2(self, glyphs):
name_index += 1

new_content = [content[0:36]]
for i in range(numglyphs):
val = new_glyph_index.get(i, 0)
for i in range(1, numglyphs):
val = new_glyph_index.get(i, N_BASIC_NAMES)
new_content.append(struct.pack('>H', val))

for name in names:
Expand All @@ -379,8 +377,205 @@ def subset(self, glyphs):
self.content = self._subset_format2(glyphs)


class _HheaTable(_Table):
hhea_table_struct = _BinaryStruct([
('version', 'I'),
('ascent', 'h'),
('descent', 'h'),
('lineGap', 'h'),
('advanceWidthMax', 'H'),
('minLeftSideBearing', 'h'),
('minRightSideBearing', 'h'),
('xMaxExtent', 'h'),
('caretSlopeRise', 'h'),
('caretSlopeRun', 'h'),
('caretOffset', 'h'),
('res0', 'h'),
('res1', 'h'),
('res2', 'h'),
('res3', 'h'),
('metricDataFormat', 'h'),
('numOfLongHorMetrics', 'H')])

def __init__(self, header, content):
super(_HheaTable, self).__init__(header, content)

self.__dict__.update(self.hhea_table_struct.unpack(content))


class _HmtxTable(_Table):
def subset(self, glyph_set, offsets, hhea):
# In keeping with not changing glyph ids, we can't actually
# remove entries here. However, we can set unused entries to
# 0 which should aid in compression.

n_glyphs = len(offsets) - 1
n_long_hor_metrics = hhea.numOfLongHorMetrics
content = self.content

h_metrics = content[:n_long_hor_metrics*4]
new_values = []
for i in range(n_long_hor_metrics):
if i in glyph_set:
new_values.append(h_metrics[i*4:i*4+4])
else:
new_values.append(b'\0\0\0\0')

left_side_bearing = content[n_long_hor_metrics*4:]
for i in range(n_glyphs - n_long_hor_metrics):
if i + n_long_hor_metrics in glyph_set:
new_values.append(left_side_bearing[i*2:i*2+2])
else:
new_values.append(b'\0\0')

self.content = b''.join(new_values)


class _CmapTable(_Table):
cmap_table_struct = _BinaryStruct([
('version', 'H'),
('numTables', 'H')])

cmap_subtable_struct = _BinaryStruct([
('platformID', 'H'),
('encodingID', 'H'),
('offset', 'I')])

class _CmapSubtable(object):
format_12_struct = _BinaryStruct([
('format', 'H'),
('unused', 'H'),
('length', 'I'),
('language', 'I'),
('n_groups', 'I')])

def __init__(self, content, offset, glyphs):
self.offset = offset

format, = struct.unpack('>H', content[offset:offset+2])
if format in (0, 2, 4, 6):
self.length, = struct.unpack(
'>H', content[offset+2:offset+4])
elif format in (8, 10, 12, 13, 14):
self.length, = struct.unpack(
'>I', content[offset+4:offset+8])
else:
raise ValueError("Unknown cmap table type")

self.format = format
self.content = content[self.offset:self.offset+self.length]

if format == 12:
self._subset_format12(glyphs)

def _subset_format12(self, glyph_set):
content = self.content
header = self.format_12_struct.unpack(content[:16])
chars = []
for i in range(header['n_groups']):
start_char, end_char, start_glyph = struct.unpack(
'>III', content[16+(i*12):28+(i*12)])

for ccode in range(start_char, end_char + 1):
gind = start_glyph + (ccode - start_char)
if gind in glyph_set:
chars.append((ccode, gind))

if len(chars) < 2:
return

new_groups = [[chars[0][0], chars[0][0], chars[0][1]]]
last_ccode = chars[0][0]
last_gind = chars[0][1]

for ccode, gind in chars[1:]:
if gind - ccode == last_gind - last_ccode:
new_groups[-1][1] = ccode
else:
new_groups.append([ccode, ccode, gind])
last_ccode = ccode
last_gind = gind

new_content = [
self.format_12_struct.pack(
format=12, unused=0, length=16 + 12 * len(new_groups),
language=header['language'], n_groups=len(new_groups))]

for start, end, gind in new_groups:
new_content.append(struct.pack('>III', start, end, gind))

self.content = b''.join(new_content)
self.length = len(self.content)

def is_unicode_table(self, header):
if header['platformID'] == TT_PLATFORM.APPLE_UNICODE:
return True
elif (header['platformID'] == TT_PLATFORM.ISO and
header['encodingID'] == TT_ISO_ID.ISO_10646):
return True
elif (header['platformID'] == TT_PLATFORM.MICROSOFT and
header['encodingID'] in (TT_MS_ID.UNICODE_CS, TT_MS_ID.UCS_4)):
return True
return False

def subset(self, glyph_set):
# This removes all but the master unicode table. We could
# probably do more by shrinking that table, but this is good
# as a first pass
content = self.content

header = self.cmap_table_struct.unpack(content[:4])

i = 4
tables = {}
entries = []
for table_num in range(header['numTables']):
subheader = self.cmap_subtable_struct.unpack(content[i:i+8])
if self.is_unicode_table(subheader):
if subheader['offset'] in tables:
table = tables[subheader['offset']]
else:
try:
table = self._CmapSubtable(content, subheader['offset'], glyph_set)
except ValueError:
# If unknown cmap table types, just abort on subsetting
return
tables[subheader['offset']] = table
entries.append((subheader, table))
i += 8

# If we don't have a Unicode table, just leave everything intact
if len(entries) == 0:
return

tables = list(tables.values())
offset = 4 + len(entries) * 8
for table in tables:
table.offset = offset
offset += table.length

new_content = [
self.cmap_table_struct.pack(
version=header['version'],
numTables=len(entries))]

for subheader, table in entries:
new_content.append(self.cmap_subtable_struct.pack(
platformID=subheader['platformID'],
encodingID=subheader['encodingID'],
offset=table.offset))

for table in tables:
new_content.append(table.content)

self.content = b''.join(new_content)


SPECIAL_TABLES = {
b'cmap': _CmapTable,
b'head': _HeadTable,
b'hhea': _HheaTable,
b'hmtx': _HmtxTable,
b'loca': _LocaTable,
b'glyf': _GlyfTable,
b'post': _PostTable
Expand Down Expand Up @@ -450,12 +645,19 @@ def subset(self, ccodes):
offsets = self[b'loca'].get_offsets(self)
# Find all glyphs used, including components of compound
# glyphs
glyphs = self[b'glyf'].find_all_glyphs(glyphs, offsets)
glyph_set = self[b'glyf'].find_all_glyphs(glyphs, offsets)

glyphs = list(glyph_set)
glyphs.sort()

self[b'glyf'].subset(glyphs, offsets)
self[b'loca'].subset(self, glyphs, offsets)
if b'post' in self._tables:
self[b'post'].subset(glyphs)
if b'hmtx' in self._tables and b'hhea' in self._tables:
self[b'hmtx'].subset(glyph_set, offsets, self[b'hhea'])
if b'cmap' in self._tables:
self[b'cmap'].subset(glyph_set)

def write(self, fd):
self._header['numTables'] = len(self._tables)
Expand Down