Skip to content

Commit 2ffb1d7

Browse files
committed
Merge pull request #14 from mdboom/fix-subsetting
Fix subsetting
2 parents 0384771 + 3fab36e commit 2ffb1d7

File tree

3 files changed

+172
-21
lines changed

3 files changed

+172
-21
lines changed

lib/freetypy/subset.py

Lines changed: 168 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@
3838
from __future__ import absolute_import, division, unicode_literals, print_function
3939

4040

41+
# The general approach here is to not change any glyph ids, merely to
42+
# remove content for unused glyphs. This means the character map
43+
# tables don't have to be rewritten. Additionally, this doesn't break
44+
# random third-party table formats that use glyph ids. This does mean
45+
# that some space savings are left on the table, but for large Unicode
46+
# fonts, the glyph data itself is comprises the majority of the file
47+
# size, and this approach tackles that handily.
48+
49+
4150
__all__ = ['subset_font']
4251

4352

@@ -211,12 +220,12 @@ def _get_formats(self, fontfile):
211220
def get_offsets(self, fontfile):
212221
entry_format, entry_size, scale = self._get_formats(fontfile)
213222

214-
content = self._content
215-
offsets = []
216-
for i in range(0, len(content), entry_size):
217-
value = struct.unpack(
223+
content = self.content
224+
offsets = [
225+
struct.unpack(
218226
entry_format, content[i:i+entry_size])[0] * scale
219-
offsets.append(value)
227+
for i in range(0, len(content), entry_size)
228+
]
220229

221230
return offsets
222231

@@ -232,25 +241,149 @@ def subset(self, fontfile, glyphs, offsets):
232241
new_offsets.append(offset)
233242

234243
entry_format, entry_size, scale = self._get_formats(fontfile)
235-
new_content = []
236-
for value in new_offsets:
237-
new_content.append(struct.pack(entry_format, value // scale))
238-
self.content = b''.join(new_content)
244+
self.content = b''.join(
245+
struct.pack(entry_format, value // scale)
246+
for value in new_offsets)
239247

240248

241249
class _GlyfTable(_Table):
250+
def find_all_glyphs(self, glyphs, offsets):
251+
"""
252+
Given a set of glyphs, find all glyphs, including the targets of
253+
compound glyphs, that are needed to render the glyphs.
254+
"""
255+
ARG_1_AND_2_ARE_WORDS = 1 << 0
256+
WE_HAVE_A_SCALE = 1 << 3
257+
MORE_COMPONENTS = 1 << 5
258+
WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6
259+
WE_HAVE_A_TWO_BY_TWO = 1 << 7
260+
261+
def calculate_skip(flags):
262+
"""
263+
Calculates the number of bytes to skip to get to the next
264+
component entry.
265+
"""
266+
# Numbers can be in bytes or shorts, depending on
267+
# flag bit
268+
if flags & ARG_1_AND_2_ARE_WORDS:
269+
base_size = 2
270+
else:
271+
base_size = 1
272+
273+
nbytes = 4 + base_size * 2
274+
if flags & WE_HAVE_A_SCALE:
275+
nbytes += base_size
276+
elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
277+
nbytes += base_size * 2
278+
elif flags & WE_HAVE_A_TWO_BY_TWO:
279+
nbytes += base_size * 4
280+
281+
return nbytes
282+
283+
content = self.content
284+
all_glyphs = set()
285+
glyph_queue = glyphs[:]
286+
287+
while len(glyph_queue):
288+
gind = glyph_queue.pop(0)
289+
if gind in all_glyphs:
290+
continue
291+
all_glyphs.add(gind)
292+
293+
glyph = content[offsets[gind]:offsets[gind+1]]
294+
if len(glyph) == 0:
295+
continue
296+
297+
num_contours, = struct.unpack('>h', glyph[0:2])
298+
if num_contours < 0: # compound glyph
299+
# skip over glyph header
300+
i = 10
301+
while True:
302+
flags, component_gind = struct.unpack('>HH', glyph[i:i+4])
303+
glyph_queue.append(component_gind)
304+
305+
if not flags & MORE_COMPONENTS:
306+
break
307+
308+
i += calculate_skip(flags)
309+
310+
all_glyphs = list(all_glyphs)
311+
all_glyphs.sort()
312+
return all_glyphs
313+
242314
def subset(self, glyphs, offsets):
243315
content = self.content
244-
new_content = []
316+
317+
self.content = b''.join(
318+
content[offsets[gind]:offsets[gind+1]]
319+
for gind in glyphs)
320+
321+
322+
class _PostTable(_Table):
323+
post_table_struct = _BinaryStruct([
324+
('format', 'I')])
325+
326+
def __init__(self, header, content):
327+
super(_PostTable, self).__init__(header, content)
328+
329+
self.__dict__.update(self.post_table_struct.unpack(content[:4]))
330+
331+
def _subset_format2(self, glyphs):
332+
N_BASIC_NAMES = 258
333+
334+
content = self._content
335+
i = 32
336+
337+
numglyphs, = struct.unpack('>H', content[i:i+2])
338+
i += 2
339+
340+
new_glyph_index = {}
341+
needed_indices = {}
245342
for gind in glyphs:
246-
new_content.append(content[offsets[gind]:offsets[gind+1]])
247-
self.content = b''.join(new_content)
343+
if gind < numglyphs:
344+
offset = i + 2 * gind
345+
name_index, = struct.unpack('>H', content[offset:offset+2])
346+
if name_index < n_basic_names:
347+
new_glyph_index[gind] = name_index
348+
elif (name_index >= n_basic_names and
349+
name_index < numglyphs - n_basic_names):
350+
needed_indices[name_index - n_basic_names] = gind
351+
352+
names = []
353+
name_index = 0
354+
i += 2 * numglyphs
355+
while i < len(content):
356+
name_length, = struct.unpack('>B', content[i:i+1])
357+
i += 1
358+
if name_index in needed_indices:
359+
name = content[i:i+name_length]
360+
new_glyph_index[needed_indices[name_index]] = (
361+
len(names) + n_basic_names)
362+
names.append(name)
363+
i += name_length
364+
name_index += 1
365+
366+
new_content = [content[0:36]]
367+
for i in range(numglyphs):
368+
val = new_glyph_index.get(i, 0)
369+
new_content.append(struct.pack('>H', val))
370+
371+
for name in names:
372+
new_content.append(struct.pack('>B', len(name)))
373+
new_content.append(name)
374+
375+
return b''.join(new_content)
376+
377+
def subset(self, glyphs):
378+
if self.format == 0x20000 and False:
379+
self.content = self._subset_format2(glyphs)
248380

249381

250382
SPECIAL_TABLES = {
251383
b'head': _HeadTable,
252384
b'loca': _LocaTable,
253-
b'glyf': _GlyfTable
385+
b'glyf': _GlyfTable,
386+
b'post': _PostTable
254387
}
255388

256389

@@ -279,7 +412,7 @@ def __hasitem__(self, tag):
279412
return tag in self._tables
280413

281414
@classmethod
282-
def read(cls, fd):
415+
def read(cls, fd, tables_to_remove=[]):
283416
header = cls.header_struct.read(fd)
284417

285418
if header['version'] not in UNDERSTOOD_VERSIONS:
@@ -293,6 +426,8 @@ def read(cls, fd):
293426
for table_header in table_dir:
294427
fd.seek(table_header['offset'])
295428
content = fd.read(table_header['length'])
429+
if table_header['tag'] in tables_to_remove:
430+
continue
296431
table_cls = SPECIAL_TABLES.get(table_header['tag'], _Table)
297432
tables[table_header['tag']] = table_cls(table_header, content)
298433

@@ -311,13 +446,20 @@ def subset(self, ccodes):
311446
glyphs = [0]
312447
for ccode in ccodes:
313448
glyphs.append(self._face.get_char_index_unicode(ccode))
314-
glyphs.sort()
315449

316450
offsets = self[b'loca'].get_offsets(self)
451+
# Find all glyphs used, including components of compound
452+
# glyphs
453+
glyphs = self[b'glyf'].find_all_glyphs(glyphs, offsets)
454+
317455
self[b'glyf'].subset(glyphs, offsets)
318456
self[b'loca'].subset(self, glyphs, offsets)
457+
if b'post' in self._tables:
458+
self[b'post'].subset(glyphs)
319459

320460
def write(self, fd):
461+
self._header['numTables'] = len(self._tables)
462+
321463
self.header_struct.write(fd, self._header)
322464

323465
offset = (self.header_struct.size +
@@ -334,7 +476,7 @@ def write(self, fd):
334476
fd.write(table._content)
335477

336478

337-
def subset_font(input_fd, output_fd, charcodes):
479+
def subset_font(input_fd, output_fd, charcodes, tables_to_remove=None):
338480
"""
339481
Subset a SFNT-style (TrueType or OpenType) font.
340482
@@ -350,7 +492,16 @@ def subset_font(input_fd, output_fd, charcodes):
350492
351493
charcodes : list of int or unicode string
352494
The character codes to include in the output font file.
495+
496+
tables_to_remove : list of bytes, optional
497+
The tags of tables to remove completely. If not provided,
498+
this defaults to:
499+
500+
[b'GPOS', b'GSUB']
353501
"""
354-
fontfile = _FontFile.read(input_fd)
502+
if tables_to_remove is None:
503+
tables_to_remove = [b'GPOS', b'GSUB']
504+
505+
fontfile = _FontFile.read(input_fd, tables_to_remove)
355506
fontfile.subset(charcodes)
356507
fontfile.write(output_fd)

src/freetypy.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ either expressed or implied, of the FreeBSD Project.
6161

6262
#include "doc/lcd.h"
6363

64-
#include "freetype/ftlcdfil.h"
64+
#include FT_LCD_FILTER_H
6565

6666
static FT_Library ft_library;
6767

@@ -94,7 +94,7 @@ py_set_lcd_filter(PyObject *self, PyObject *args, PyObject *kwargs)
9494
PyObject *
9595
py_set_lcd_filter_weights(PyObject *self, PyObject *args, PyObject *kwargs)
9696
{
97-
char filters[5];
97+
unsigned char filters[5];
9898

9999
static char *kwlist[] = {"filter", NULL};
100100

src/lcd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ either expressed or implied, of the FreeBSD Project.
3030
#include "lcd.h"
3131
#include "doc/lcd.h"
3232

33-
#include "freetype/ftlcdfil.h"
33+
#include FT_LCD_FILTER_H
3434

3535
ftpy_ConstantType Py_FT_LCD_FILTER_ConstantType;
3636
static PyTypeObject Py_FT_LCD_FILTER_Type;
@@ -46,7 +46,7 @@ static constant_def FT_LCD_FILTER_constants[] = {
4646

4747
int setup_Lcd(PyObject *m)
4848
{
49-
define_constant_namespace(
49+
return define_constant_namespace(
5050
m, &Py_FT_LCD_FILTER_Type, &Py_FT_LCD_FILTER_ConstantType,
5151
"freetypy.LCD_FILTER",
5252
doc_LCD_FILTER, FT_LCD_FILTER_constants);

0 commit comments

Comments
 (0)