38
38
from __future__ import absolute_import , division , unicode_literals , print_function
39
39
40
40
41
+ # The general approach here is to not change any glyph ids, merely to
42
+ # remove content for unused glyphs. This means the character map
43
+ # tables don't have to be rewritten. Additionally, this doesn't break
44
+ # random third-party table formats that use glyph ids. This does mean
45
+ # that some space savings are left on the table, but for large Unicode
46
+ # fonts, the glyph data itself is comprises the majority of the file
47
+ # size, and this approach tackles that handily.
48
+
49
+
41
50
__all__ = ['subset_font' ]
42
51
43
52
@@ -211,12 +220,12 @@ def _get_formats(self, fontfile):
211
220
def get_offsets (self , fontfile ):
212
221
entry_format , entry_size , scale = self ._get_formats (fontfile )
213
222
214
- content = self ._content
215
- offsets = []
216
- for i in range (0 , len (content ), entry_size ):
217
- value = struct .unpack (
223
+ content = self .content
224
+ offsets = [
225
+ struct .unpack (
218
226
entry_format , content [i :i + entry_size ])[0 ] * scale
219
- offsets .append (value )
227
+ for i in range (0 , len (content ), entry_size )
228
+ ]
220
229
221
230
return offsets
222
231
@@ -232,25 +241,149 @@ def subset(self, fontfile, glyphs, offsets):
232
241
new_offsets .append (offset )
233
242
234
243
entry_format , entry_size , scale = self ._get_formats (fontfile )
235
- new_content = []
236
- for value in new_offsets :
237
- new_content .append (struct .pack (entry_format , value // scale ))
238
- self .content = b'' .join (new_content )
244
+ self .content = b'' .join (
245
+ struct .pack (entry_format , value // scale )
246
+ for value in new_offsets )
239
247
240
248
241
249
class _GlyfTable (_Table ):
250
+ def find_all_glyphs (self , glyphs , offsets ):
251
+ """
252
+ Given a set of glyphs, find all glyphs, including the targets of
253
+ compound glyphs, that are needed to render the glyphs.
254
+ """
255
+ ARG_1_AND_2_ARE_WORDS = 1 << 0
256
+ WE_HAVE_A_SCALE = 1 << 3
257
+ MORE_COMPONENTS = 1 << 5
258
+ WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6
259
+ WE_HAVE_A_TWO_BY_TWO = 1 << 7
260
+
261
+ def calculate_skip (flags ):
262
+ """
263
+ Calculates the number of bytes to skip to get to the next
264
+ component entry.
265
+ """
266
+ # Numbers can be in bytes or shorts, depending on
267
+ # flag bit
268
+ if flags & ARG_1_AND_2_ARE_WORDS :
269
+ base_size = 2
270
+ else :
271
+ base_size = 1
272
+
273
+ nbytes = 4 + base_size * 2
274
+ if flags & WE_HAVE_A_SCALE :
275
+ nbytes += base_size
276
+ elif flags & WE_HAVE_AN_X_AND_Y_SCALE :
277
+ nbytes += base_size * 2
278
+ elif flags & WE_HAVE_A_TWO_BY_TWO :
279
+ nbytes += base_size * 4
280
+
281
+ return nbytes
282
+
283
+ content = self .content
284
+ all_glyphs = set ()
285
+ glyph_queue = glyphs [:]
286
+
287
+ while len (glyph_queue ):
288
+ gind = glyph_queue .pop (0 )
289
+ if gind in all_glyphs :
290
+ continue
291
+ all_glyphs .add (gind )
292
+
293
+ glyph = content [offsets [gind ]:offsets [gind + 1 ]]
294
+ if len (glyph ) == 0 :
295
+ continue
296
+
297
+ num_contours , = struct .unpack ('>h' , glyph [0 :2 ])
298
+ if num_contours < 0 : # compound glyph
299
+ # skip over glyph header
300
+ i = 10
301
+ while True :
302
+ flags , component_gind = struct .unpack ('>HH' , glyph [i :i + 4 ])
303
+ glyph_queue .append (component_gind )
304
+
305
+ if not flags & MORE_COMPONENTS :
306
+ break
307
+
308
+ i += calculate_skip (flags )
309
+
310
+ all_glyphs = list (all_glyphs )
311
+ all_glyphs .sort ()
312
+ return all_glyphs
313
+
242
314
def subset (self , glyphs , offsets ):
243
315
content = self .content
244
- new_content = []
316
+
317
+ self .content = b'' .join (
318
+ content [offsets [gind ]:offsets [gind + 1 ]]
319
+ for gind in glyphs )
320
+
321
+
322
+ class _PostTable (_Table ):
323
+ post_table_struct = _BinaryStruct ([
324
+ ('format' , 'I' )])
325
+
326
+ def __init__ (self , header , content ):
327
+ super (_PostTable , self ).__init__ (header , content )
328
+
329
+ self .__dict__ .update (self .post_table_struct .unpack (content [:4 ]))
330
+
331
+ def _subset_format2 (self , glyphs ):
332
+ N_BASIC_NAMES = 258
333
+
334
+ content = self ._content
335
+ i = 32
336
+
337
+ numglyphs , = struct .unpack ('>H' , content [i :i + 2 ])
338
+ i += 2
339
+
340
+ new_glyph_index = {}
341
+ needed_indices = {}
245
342
for gind in glyphs :
246
- new_content .append (content [offsets [gind ]:offsets [gind + 1 ]])
247
- self .content = b'' .join (new_content )
343
+ if gind < numglyphs :
344
+ offset = i + 2 * gind
345
+ name_index , = struct .unpack ('>H' , content [offset :offset + 2 ])
346
+ if name_index < n_basic_names :
347
+ new_glyph_index [gind ] = name_index
348
+ elif (name_index >= n_basic_names and
349
+ name_index < numglyphs - n_basic_names ):
350
+ needed_indices [name_index - n_basic_names ] = gind
351
+
352
+ names = []
353
+ name_index = 0
354
+ i += 2 * numglyphs
355
+ while i < len (content ):
356
+ name_length , = struct .unpack ('>B' , content [i :i + 1 ])
357
+ i += 1
358
+ if name_index in needed_indices :
359
+ name = content [i :i + name_length ]
360
+ new_glyph_index [needed_indices [name_index ]] = (
361
+ len (names ) + n_basic_names )
362
+ names .append (name )
363
+ i += name_length
364
+ name_index += 1
365
+
366
+ new_content = [content [0 :36 ]]
367
+ for i in range (numglyphs ):
368
+ val = new_glyph_index .get (i , 0 )
369
+ new_content .append (struct .pack ('>H' , val ))
370
+
371
+ for name in names :
372
+ new_content .append (struct .pack ('>B' , len (name )))
373
+ new_content .append (name )
374
+
375
+ return b'' .join (new_content )
376
+
377
+ def subset (self , glyphs ):
378
+ if self .format == 0x20000 and False :
379
+ self .content = self ._subset_format2 (glyphs )
248
380
249
381
250
382
SPECIAL_TABLES = {
251
383
b'head' : _HeadTable ,
252
384
b'loca' : _LocaTable ,
253
- b'glyf' : _GlyfTable
385
+ b'glyf' : _GlyfTable ,
386
+ b'post' : _PostTable
254
387
}
255
388
256
389
@@ -279,7 +412,7 @@ def __hasitem__(self, tag):
279
412
return tag in self ._tables
280
413
281
414
@classmethod
282
- def read (cls , fd ):
415
+ def read (cls , fd , tables_to_remove = [] ):
283
416
header = cls .header_struct .read (fd )
284
417
285
418
if header ['version' ] not in UNDERSTOOD_VERSIONS :
@@ -293,6 +426,8 @@ def read(cls, fd):
293
426
for table_header in table_dir :
294
427
fd .seek (table_header ['offset' ])
295
428
content = fd .read (table_header ['length' ])
429
+ if table_header ['tag' ] in tables_to_remove :
430
+ continue
296
431
table_cls = SPECIAL_TABLES .get (table_header ['tag' ], _Table )
297
432
tables [table_header ['tag' ]] = table_cls (table_header , content )
298
433
@@ -311,13 +446,20 @@ def subset(self, ccodes):
311
446
glyphs = [0 ]
312
447
for ccode in ccodes :
313
448
glyphs .append (self ._face .get_char_index_unicode (ccode ))
314
- glyphs .sort ()
315
449
316
450
offsets = self [b'loca' ].get_offsets (self )
451
+ # Find all glyphs used, including components of compound
452
+ # glyphs
453
+ glyphs = self [b'glyf' ].find_all_glyphs (glyphs , offsets )
454
+
317
455
self [b'glyf' ].subset (glyphs , offsets )
318
456
self [b'loca' ].subset (self , glyphs , offsets )
457
+ if b'post' in self ._tables :
458
+ self [b'post' ].subset (glyphs )
319
459
320
460
def write (self , fd ):
461
+ self ._header ['numTables' ] = len (self ._tables )
462
+
321
463
self .header_struct .write (fd , self ._header )
322
464
323
465
offset = (self .header_struct .size +
@@ -334,7 +476,7 @@ def write(self, fd):
334
476
fd .write (table ._content )
335
477
336
478
337
- def subset_font (input_fd , output_fd , charcodes ):
479
+ def subset_font (input_fd , output_fd , charcodes , tables_to_remove = None ):
338
480
"""
339
481
Subset a SFNT-style (TrueType or OpenType) font.
340
482
@@ -350,7 +492,16 @@ def subset_font(input_fd, output_fd, charcodes):
350
492
351
493
charcodes : list of int or unicode string
352
494
The character codes to include in the output font file.
495
+
496
+ tables_to_remove : list of bytes, optional
497
+ The tags of tables to remove completely. If not provided,
498
+ this defaults to:
499
+
500
+ [b'GPOS', b'GSUB']
353
501
"""
354
- fontfile = _FontFile .read (input_fd )
502
+ if tables_to_remove is None :
503
+ tables_to_remove = [b'GPOS' , b'GSUB' ]
504
+
505
+ fontfile = _FontFile .read (input_fd , tables_to_remove )
355
506
fontfile .subset (charcodes )
356
507
fontfile .write (output_fd )
0 commit comments