Skip to content

Commit b64e656

Browse files
committed
Improve and fix subsetting
1 parent 0384771 commit b64e656

File tree

1 file changed

+150
-12
lines changed

1 file changed

+150
-12
lines changed

lib/freetypy/subset.py

Lines changed: 150 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,11 @@ def get_offsets(self, fontfile):
212212
entry_format, entry_size, scale = self._get_formats(fontfile)
213213

214214
content = self._content
215-
offsets = []
216-
for i in range(0, len(content), entry_size):
217-
value = struct.unpack(
215+
offsets = [
216+
struct.unpack(
218217
entry_format, content[i:i+entry_size])[0] * scale
219-
offsets.append(value)
218+
for i in range(0, len(content), entry_size)
219+
]
220220

221221
return offsets
222222

@@ -239,18 +239,140 @@ def subset(self, fontfile, glyphs, offsets):
239239

240240

241241
class _GlyfTable(_Table):
242+
def find_all_glyphs(self, glyphs, offsets):
243+
"""
244+
Given a set of glyphs, find all glyphs, including the targets of
245+
compound glyphs, that are needed to render the glyphs.
246+
"""
247+
ARG_1_AND_2_ARE_WORDS = 1 << 0
248+
WE_HAVE_A_SCALE = 1 << 3
249+
MORE_COMPONENTS = 1 << 5
250+
WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6
251+
WE_HAVE_A_TWO_BY_TWO = 1 << 7
252+
253+
content = self.content
254+
255+
all_glyphs = set()
256+
glyph_queue = glyphs[:]
257+
258+
while len(glyph_queue):
259+
gind = glyph_queue.pop(0)
260+
if gind in all_glyphs:
261+
continue
262+
all_glyphs.add(gind)
263+
264+
glyph = content[offsets[gind]:offsets[gind+1]]
265+
if len(glyph) == 0:
266+
continue
267+
268+
num_contours, = struct.unpack('>h', glyph[0:2])
269+
if num_contours < 0: # compound glyph
270+
# skip over glyph header
271+
i = 10
272+
while True:
273+
flags, component_gind = struct.unpack('>HH', glyph[i:i+4])
274+
glyph_queue.append(component_gind)
275+
276+
if not flags & MORE_COMPONENTS:
277+
break
278+
279+
# Numbers can be in bytes or shorts, depending on
280+
# flag bit
281+
if flags & ARG_1_AND_2_ARE_WORDS:
282+
base_size = 2
283+
else:
284+
base_size = 1
285+
286+
# This is just to calculate how many bytes to skip
287+
# over to find next component entry
288+
i += 4 + base_size * 2
289+
if flags & WE_HAVE_A_SCALE:
290+
i += base_size
291+
elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
292+
i += base_size * 2
293+
elif flags & WE_HAVE_A_TWO_BY_TWO:
294+
i += base_size * 4
295+
296+
all_glyphs = list(all_glyphs)
297+
all_glyphs.sort()
298+
return all_glyphs
299+
242300
def subset(self, glyphs, offsets):
243301
content = self.content
244-
new_content = []
302+
303+
self.content = b''.join(
304+
content[offsets[gind]:offsets[gind+1]]
305+
for gind in glyphs)
306+
307+
308+
class _PostTable(_Table):
309+
post_table_struct = _BinaryStruct([
310+
('format', 'I'),
311+
('unused', '28s')])
312+
313+
def __init__(self, header, content):
314+
super(_PostTable, self).__init__(header, content)
315+
316+
with open('content.bin', 'wb') as fd:
317+
fd.write(content)
318+
319+
self.__dict__.update(self.post_table_struct.unpack(content[:32]))
320+
321+
def _subset_format2(self, glyphs):
322+
n_basic_names = 258
323+
324+
content = self._content
325+
i = 32
326+
327+
numglyphs, = struct.unpack('>H', content[i:i+2])
328+
i += 2
329+
330+
new_glyph_index = {}
331+
needed_indices = {}
245332
for gind in glyphs:
246-
new_content.append(content[offsets[gind]:offsets[gind+1]])
247-
self.content = b''.join(new_content)
333+
if gind < numglyphs:
334+
offset = i + 2 * gind
335+
name_index, = struct.unpack('>H', content[offset:offset+2])
336+
if name_index < n_basic_names:
337+
new_glyph_index[gind] = name_index
338+
elif (name_index >= n_basic_names and
339+
name_index < numglyphs - n_basic_names):
340+
needed_indices[name_index - n_basic_names] = gind
341+
342+
names = []
343+
name_index = 0
344+
i += 2 * numglyphs
345+
while i < len(content):
346+
name_length, = struct.unpack('>B', content[i:i+1])
347+
i += 1
348+
if name_index in needed_indices:
349+
name = content[i:i+name_length]
350+
new_glyph_index[needed_indices[name_index]] = (
351+
len(names) + n_basic_names)
352+
names.append(name)
353+
i += name_length
354+
name_index += 1
355+
356+
new_content = [content[0:36]]
357+
for i in range(numglyphs):
358+
val = new_glyph_index.get(i, 0)
359+
new_content.append(struct.pack('>H', val))
360+
for name in names:
361+
new_content.append(struct.pack('>B', len(name)))
362+
new_content.append(name)
363+
364+
self._content = b''.join(new_content)
365+
366+
def subset(self, glyphs):
367+
if self.format == 0x20000:
368+
self._subset_format2(glyphs)
248369

249370

250371
SPECIAL_TABLES = {
251372
b'head': _HeadTable,
252373
b'loca': _LocaTable,
253-
b'glyf': _GlyfTable
374+
b'glyf': _GlyfTable,
375+
b'post': _PostTable
254376
}
255377

256378

@@ -279,7 +401,7 @@ def __hasitem__(self, tag):
279401
return tag in self._tables
280402

281403
@classmethod
282-
def read(cls, fd):
404+
def read(cls, fd, tables_to_remove=[]):
283405
header = cls.header_struct.read(fd)
284406

285407
if header['version'] not in UNDERSTOOD_VERSIONS:
@@ -293,6 +415,8 @@ def read(cls, fd):
293415
for table_header in table_dir:
294416
fd.seek(table_header['offset'])
295417
content = fd.read(table_header['length'])
418+
if table_header['tag'] in tables_to_remove:
419+
continue
296420
table_cls = SPECIAL_TABLES.get(table_header['tag'], _Table)
297421
tables[table_header['tag']] = table_cls(table_header, content)
298422

@@ -311,11 +435,16 @@ def subset(self, ccodes):
311435
glyphs = [0]
312436
for ccode in ccodes:
313437
glyphs.append(self._face.get_char_index_unicode(ccode))
314-
glyphs.sort()
315438

316439
offsets = self[b'loca'].get_offsets(self)
440+
# Find all glyphs used, including components of compound
441+
# glyphs
442+
glyphs = self[b'glyf'].find_all_glyphs(glyphs, offsets)
443+
317444
self[b'glyf'].subset(glyphs, offsets)
318445
self[b'loca'].subset(self, glyphs, offsets)
446+
if b'post' in self._tables:
447+
self[b'post'].subset(glyphs)
319448

320449
def write(self, fd):
321450
self.header_struct.write(fd, self._header)
@@ -334,7 +463,7 @@ def write(self, fd):
334463
fd.write(table._content)
335464

336465

337-
def subset_font(input_fd, output_fd, charcodes):
466+
def subset_font(input_fd, output_fd, charcodes, tables_to_remove=None):
338467
"""
339468
Subset a SFNT-style (TrueType or OpenType) font.
340469
@@ -350,7 +479,16 @@ def subset_font(input_fd, output_fd, charcodes):
350479
351480
charcodes : list of int or unicode string
352481
The character codes to include in the output font file.
482+
483+
tables_to_remove : list of bytes, optional
484+
The tags of tables to remove completely. If not provided,
485+
this defaults to:
486+
487+
[b'GPOS', b'GSUB']
353488
"""
354-
fontfile = _FontFile.read(input_fd)
489+
if tables_to_remove is None:
490+
tables_to_remove = [b'GPOS', b'GSUB']
491+
492+
fontfile = _FontFile.read(input_fd, tables_to_remove)
355493
fontfile.subset(charcodes)
356494
fontfile.write(output_fd)

0 commit comments

Comments
 (0)