Skip to content

Fix subsetting #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 168 additions & 17 deletions lib/freetypy/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@
from __future__ import absolute_import, division, unicode_literals, print_function


# The general approach here is to not change any glyph ids, merely to
# remove content for unused glyphs. This means the character map
# tables don't have to be rewritten. Additionally, this doesn't break
# random third-party table formats that use glyph ids. This does mean
# that some space savings are left on the table, but for large Unicode
# fonts, the glyph data itself is comprises the majority of the file
# size, and this approach tackles that handily.


__all__ = ['subset_font']


Expand Down Expand Up @@ -211,12 +220,12 @@ def _get_formats(self, fontfile):
def get_offsets(self, fontfile):
entry_format, entry_size, scale = self._get_formats(fontfile)

content = self._content
offsets = []
for i in range(0, len(content), entry_size):
value = struct.unpack(
content = self.content
offsets = [
struct.unpack(
entry_format, content[i:i+entry_size])[0] * scale
offsets.append(value)
for i in range(0, len(content), entry_size)
]

return offsets

Expand All @@ -232,25 +241,149 @@ def subset(self, fontfile, glyphs, offsets):
new_offsets.append(offset)

entry_format, entry_size, scale = self._get_formats(fontfile)
new_content = []
for value in new_offsets:
new_content.append(struct.pack(entry_format, value // scale))
self.content = b''.join(new_content)
self.content = b''.join(
struct.pack(entry_format, value // scale)
for value in new_offsets)


class _GlyfTable(_Table):
def find_all_glyphs(self, glyphs, offsets):
"""
Given a set of glyphs, find all glyphs, including the targets of
compound glyphs, that are needed to render the glyphs.
"""
ARG_1_AND_2_ARE_WORDS = 1 << 0
WE_HAVE_A_SCALE = 1 << 3
MORE_COMPONENTS = 1 << 5
WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6
WE_HAVE_A_TWO_BY_TWO = 1 << 7

def calculate_skip(flags):
"""
Calculates the number of bytes to skip to get to the next
component entry.
"""
# Numbers can be in bytes or shorts, depending on
# flag bit
if flags & ARG_1_AND_2_ARE_WORDS:
base_size = 2
else:
base_size = 1

nbytes = 4 + base_size * 2
if flags & WE_HAVE_A_SCALE:
nbytes += base_size
elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
nbytes += base_size * 2
elif flags & WE_HAVE_A_TWO_BY_TWO:
nbytes += base_size * 4

return nbytes

content = self.content
all_glyphs = set()
glyph_queue = glyphs[:]

while len(glyph_queue):
gind = glyph_queue.pop(0)
if gind in all_glyphs:
continue
all_glyphs.add(gind)

glyph = content[offsets[gind]:offsets[gind+1]]
if len(glyph) == 0:
continue

num_contours, = struct.unpack('>h', glyph[0:2])
if num_contours < 0: # compound glyph
# skip over glyph header
i = 10
while True:
flags, component_gind = struct.unpack('>HH', glyph[i:i+4])
glyph_queue.append(component_gind)

if not flags & MORE_COMPONENTS:
break

i += calculate_skip(flags)

all_glyphs = list(all_glyphs)
all_glyphs.sort()
return all_glyphs

def subset(self, glyphs, offsets):
content = self.content
new_content = []

self.content = b''.join(
content[offsets[gind]:offsets[gind+1]]
for gind in glyphs)


class _PostTable(_Table):
post_table_struct = _BinaryStruct([
('format', 'I')])

def __init__(self, header, content):
super(_PostTable, self).__init__(header, content)

self.__dict__.update(self.post_table_struct.unpack(content[:4]))

def _subset_format2(self, glyphs):
N_BASIC_NAMES = 258

content = self._content
i = 32

numglyphs, = struct.unpack('>H', content[i:i+2])
i += 2

new_glyph_index = {}
needed_indices = {}
for gind in glyphs:
new_content.append(content[offsets[gind]:offsets[gind+1]])
self.content = b''.join(new_content)
if gind < numglyphs:
offset = i + 2 * gind
name_index, = struct.unpack('>H', content[offset:offset+2])
if name_index < n_basic_names:
new_glyph_index[gind] = name_index
elif (name_index >= n_basic_names and
name_index < numglyphs - n_basic_names):
needed_indices[name_index - n_basic_names] = gind

names = []
name_index = 0
i += 2 * numglyphs
while i < len(content):
name_length, = struct.unpack('>B', content[i:i+1])
i += 1
if name_index in needed_indices:
name = content[i:i+name_length]
new_glyph_index[needed_indices[name_index]] = (
len(names) + n_basic_names)
names.append(name)
i += name_length
name_index += 1

new_content = [content[0:36]]
for i in range(numglyphs):
val = new_glyph_index.get(i, 0)
new_content.append(struct.pack('>H', val))

for name in names:
new_content.append(struct.pack('>B', len(name)))
new_content.append(name)

return b''.join(new_content)

def subset(self, glyphs):
if self.format == 0x20000 and False:
self.content = self._subset_format2(glyphs)


SPECIAL_TABLES = {
b'head': _HeadTable,
b'loca': _LocaTable,
b'glyf': _GlyfTable
b'glyf': _GlyfTable,
b'post': _PostTable
}


Expand Down Expand Up @@ -279,7 +412,7 @@ def __hasitem__(self, tag):
return tag in self._tables

@classmethod
def read(cls, fd):
def read(cls, fd, tables_to_remove=[]):
header = cls.header_struct.read(fd)

if header['version'] not in UNDERSTOOD_VERSIONS:
Expand All @@ -293,6 +426,8 @@ def read(cls, fd):
for table_header in table_dir:
fd.seek(table_header['offset'])
content = fd.read(table_header['length'])
if table_header['tag'] in tables_to_remove:
continue
table_cls = SPECIAL_TABLES.get(table_header['tag'], _Table)
tables[table_header['tag']] = table_cls(table_header, content)

Expand All @@ -311,13 +446,20 @@ def subset(self, ccodes):
glyphs = [0]
for ccode in ccodes:
glyphs.append(self._face.get_char_index_unicode(ccode))
glyphs.sort()

offsets = self[b'loca'].get_offsets(self)
# Find all glyphs used, including components of compound
# glyphs
glyphs = self[b'glyf'].find_all_glyphs(glyphs, offsets)

self[b'glyf'].subset(glyphs, offsets)
self[b'loca'].subset(self, glyphs, offsets)
if b'post' in self._tables:
self[b'post'].subset(glyphs)

def write(self, fd):
self._header['numTables'] = len(self._tables)

self.header_struct.write(fd, self._header)

offset = (self.header_struct.size +
Expand All @@ -334,7 +476,7 @@ def write(self, fd):
fd.write(table._content)


def subset_font(input_fd, output_fd, charcodes):
def subset_font(input_fd, output_fd, charcodes, tables_to_remove=None):
"""
Subset a SFNT-style (TrueType or OpenType) font.

Expand All @@ -350,7 +492,16 @@ def subset_font(input_fd, output_fd, charcodes):

charcodes : list of int or unicode string
The character codes to include in the output font file.

tables_to_remove : list of bytes, optional
The tags of tables to remove completely. If not provided,
this defaults to:

[b'GPOS', b'GSUB']
"""
fontfile = _FontFile.read(input_fd)
if tables_to_remove is None:
tables_to_remove = [b'GPOS', b'GSUB']

fontfile = _FontFile.read(input_fd, tables_to_remove)
fontfile.subset(charcodes)
fontfile.write(output_fd)
4 changes: 2 additions & 2 deletions src/freetypy.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ either expressed or implied, of the FreeBSD Project.

#include "doc/lcd.h"

#include "freetype/ftlcdfil.h"
#include FT_LCD_FILTER_H

static FT_Library ft_library;

Expand Down Expand Up @@ -94,7 +94,7 @@ py_set_lcd_filter(PyObject *self, PyObject *args, PyObject *kwargs)
PyObject *
py_set_lcd_filter_weights(PyObject *self, PyObject *args, PyObject *kwargs)
{
char filters[5];
unsigned char filters[5];

static char *kwlist[] = {"filter", NULL};

Expand Down
4 changes: 2 additions & 2 deletions src/lcd.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ either expressed or implied, of the FreeBSD Project.
#include "lcd.h"
#include "doc/lcd.h"

#include "freetype/ftlcdfil.h"
#include FT_LCD_FILTER_H

ftpy_ConstantType Py_FT_LCD_FILTER_ConstantType;
static PyTypeObject Py_FT_LCD_FILTER_Type;
Expand All @@ -46,7 +46,7 @@ static constant_def FT_LCD_FILTER_constants[] = {

int setup_Lcd(PyObject *m)
{
define_constant_namespace(
return define_constant_namespace(
m, &Py_FT_LCD_FILTER_Type, &Py_FT_LCD_FILTER_ConstantType,
"freetypy.LCD_FILTER",
doc_LCD_FILTER, FT_LCD_FILTER_constants);
Expand Down