Skip to content

Commit 4f80058

Browse files
committed
Auto-kerning
Switch from Tj to TJ operator. There's currently no option to turn off auto-kerning.
1 parent 9d35620 commit 4f80058

File tree

8 files changed

+253
-180
lines changed

8 files changed

+253
-180
lines changed

lib/mudbrick.ex

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ defmodule Mudbrick do
22
@moduledoc """
33
API for creating and exporting PDF documents.
44
5-
## Example
5+
## General example
66
77
Compression, OTF font with special characters, JPEG and line drawing:
88
@@ -42,6 +42,25 @@ defmodule Mudbrick do
4242
Produces [this](examples/compression_font_special_chars.pdf).
4343
4444
<object width="400" height="215" data="examples/compression_font_special_chars.pdf?#navpanes=0" type="application/pdf"></object>
45+
46+
## Auto-kerning
47+
48+
iex> import Mudbrick.TestHelper
49+
...> import Mudbrick
50+
...> new(fonts: %{bodoni: bodoni_bold()})
51+
...> |> page(size: {600, 200})
52+
...> |> text(
53+
...> [{"Warning\\n", underline: [width: 0.5]}, "MORE ", {"efficiency", underline: [width: 0.5]}],
54+
...> font: :bodoni,
55+
...> font_size: 70,
56+
...> position: {7, 130}
57+
...> )
58+
...> |> render()
59+
...> |> then(&File.write("examples/auto_kerning.pdf", &1))
60+
61+
Produces [this](examples/auto_kerning.pdf). Notice how the 'a' is underneath the 'W' in 'Warning'.
62+
63+
<object width="400" height="215" data="examples/auto_kerning.pdf?#navpanes=0" type="application/pdf"></object>
4564
"""
4665

4766
alias Mudbrick.{

lib/mudbrick/content_stream/tj_apostrophe.ex

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,35 @@ defmodule Mudbrick.ContentStream.Tj do
55
text: nil
66
end
77

8+
defmodule Mudbrick.ContentStream.TJ do
9+
@moduledoc false
10+
defstruct font: nil,
11+
font_size: nil,
12+
operator: "TJ",
13+
text: nil
14+
15+
defimpl Mudbrick.Object do
16+
def from(%Mudbrick.ContentStream.TJ{text: ""}) do
17+
[]
18+
end
19+
20+
def from(op) do
21+
[
22+
"[ ",
23+
Mudbrick.Font.kerned(op.font, op.text)
24+
|> Enum.map(fn
25+
{glyph_id, kerning} ->
26+
["<", glyph_id, "> ", to_string(kerning), " "]
27+
28+
glyph_id ->
29+
["<", glyph_id, "> "]
30+
end),
31+
"] TJ"
32+
]
33+
end
34+
end
35+
end
36+
837
defmodule Mudbrick.ContentStream.Apostrophe do
938
@moduledoc false
1039
defstruct font: nil,

lib/mudbrick/font.ex

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,30 @@ defmodule Mudbrick.Font do
8787
end
8888

8989
def width(font, size, text) do
90-
{glyph_ids, _positions} = OpenType.layout_text(font.parsed, text)
90+
{_glyph_ids, positions} = OpenType.layout_text(font.parsed, text)
9191

92-
for id <- glyph_ids, reduce: 0 do
93-
acc ->
94-
glyph_width = Enum.at(font.parsed.glyphWidths, id)
95-
width_in_points = glyph_width / 1000 * size
96-
97-
acc + width_in_points
92+
for {_, _, _, width, _} <- positions, reduce: 0 do
93+
acc -> acc + width / 1000 * size
9894
end
9995
end
10096

97+
def kerned(font, text) do
98+
{glyph_ids_decimal, positions} =
99+
OpenType.layout_text(font.parsed, text)
100+
101+
glyph_ids_decimal
102+
|> Enum.zip(positions)
103+
|> Enum.map(fn
104+
{glyph_id, {:kern, _, _, width_when_kerned, _}} ->
105+
normal_width = Enum.at(font.parsed.glyphWidths, glyph_id)
106+
offset = normal_width - width_when_kerned
107+
{Mudbrick.to_hex(glyph_id), offset}
108+
109+
{glyph_id, {:std_width, _, _, _width, _}} ->
110+
Mudbrick.to_hex(glyph_id)
111+
end)
112+
end
113+
101114
defp add_font_file(doc, contents) do
102115
doc
103116
|> Document.add(

lib/mudbrick/predicates.ex

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ defmodule Mudbrick.Predicates do
33
Useful for testing PDF documents.
44
55
While these predicates do check the PDF in a black-box way, it's not expected
6-
that they will work on all PDFs found in the wild.
6+
that they will work on PDFs not generated with Mudbrick.
77
"""
88

99
@doc """
@@ -22,40 +22,96 @@ defmodule Mudbrick.Predicates do
2222
Checks for presence of `text` in the `pdf` `iodata`. Searches compressed and uncompressed data.
2323
2424
This arity requires you to pass the raw font data in which the text is
25-
expected to be written. It must be present in raw hexadecimal form
26-
corresponding to the font's glyph IDs.
25+
expected to be written. The text must be present in TJ operator format, which
26+
raw hexadecimal form corresponding to the font's glyph IDs, interspersed with
27+
optional kerning offsets.
2728
2829
The [OpenType](https://hexdocs.pm/opentype) library is used to find font
2930
features, such as ligatures, which are expected to have been used in the PDF.
3031
3132
## Options
3233
33-
- `:in_font` - raw font data in which the text is expected.
34+
- `:in_font` - raw font data in which the text is expected. Required.
3435
35-
## Example
36+
## Example: with compression
3637
37-
iex> Mudbrick.Predicates.has_text?("some-pdf", "hello", in_font: Mudbrick.TestHelper.bodoni_regular())
38+
iex> import Mudbrick.TestHelper
39+
...> import Mudbrick.Predicates
40+
...> import Mudbrick
41+
...> font = bodoni_regular()
42+
...> raw_pdf =
43+
...> new(compress: true, fonts: %{default: font})
44+
...> |> page()
45+
...> |> text(
46+
...> "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWhello, CO₂!WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW",
47+
...> font_size: 100
48+
...> )
49+
...> |> render()
50+
...> |> IO.iodata_to_binary()
51+
...> {has_text?(raw_pdf, "hello, CO₂!", in_font: font), has_text?(raw_pdf, "good morning!", in_font: font)}
52+
{true, false}
53+
54+
## Example: without compression
55+
56+
iex> import Mudbrick.TestHelper
57+
...> import Mudbrick.Predicates
58+
...> import Mudbrick
59+
...> font = bodoni_regular()
60+
...> raw_pdf =
61+
...> new(compress: false, fonts: %{default: font})
62+
...> |> page()
63+
...> |> text(
64+
...> "Hello, world!",
65+
...> font_size: 100
66+
...> )
67+
...> |> render()
68+
...> |> IO.iodata_to_binary()
69+
...> {has_text?(raw_pdf, "Hello, world!", in_font: font), has_text?(raw_pdf, "Good morning!", in_font: font)}
70+
{true, false}
3871
"""
3972
@spec has_text?(pdf :: iodata(), text :: binary(), opts :: list()) :: boolean()
4073
def has_text?(pdf, text, opts) do
4174
font = Keyword.fetch!(opts, :in_font)
4275
parsed_font = OpenType.new() |> OpenType.parse(font)
43-
{glyph_ids_decimal, _positions} = OpenType.layout_text(parsed_font, text)
44-
glyph_ids_hex = Enum.map_join(glyph_ids_decimal, "", &Mudbrick.to_hex/1)
4576

46-
has_text?(pdf, glyph_ids_hex)
77+
mudbrick_font = %Mudbrick.Font{
78+
name: nil,
79+
resource_identifier: nil,
80+
type: nil,
81+
parsed: parsed_font
82+
}
83+
84+
pattern_source =
85+
Mudbrick.Font.kerned(mudbrick_font, text)
86+
|> Enum.reduce("", &append_glyph_id/2)
87+
88+
pattern = Regex.compile!(pattern_source)
89+
90+
pdf
91+
|> extract_streams()
92+
|> Enum.any?(&(&1 =~ pattern))
4793
end
4894

4995
defp extract_streams(pdf) do
96+
binary = IO.iodata_to_binary(pdf)
97+
5098
~r"<<(.*?)>>\nstream\n(.*?)endstream"s
51-
|> Regex.scan(pdf, capture: :all_but_first)
99+
|> Regex.scan(binary, capture: :all_but_first)
52100
|> Enum.map(fn
53101
[dictionary, content] ->
54102
if String.contains?(dictionary, "FlateDecode") do
55-
Mudbrick.decompress(content) |> IO.iodata_to_binary()
103+
content |> Mudbrick.decompress() |> IO.iodata_to_binary()
56104
else
57105
content
58106
end
59107
end)
60108
end
109+
110+
defp append_glyph_id({glyph_id, _kerning}, acc) do
111+
append_glyph_id(glyph_id, acc)
112+
end
113+
114+
defp append_glyph_id(glyph_id, acc) do
115+
"#{acc}<#{glyph_id}>[ \\d]+"
116+
end
61117
end

lib/mudbrick/text_block/output.ex

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ defmodule Mudbrick.TextBlock.Output do
1111
alias Mudbrick.ContentStream.Rg
1212
alias Mudbrick.ContentStream.Td
1313
alias Mudbrick.ContentStream.Tf
14-
alias Mudbrick.ContentStream.{Tj, TStar}
14+
alias Mudbrick.ContentStream.{TJ, TStar}
1515
alias Mudbrick.ContentStream.TL
1616
alias Mudbrick.Path
1717
alias Mudbrick.TextBlock.Line
@@ -30,15 +30,15 @@ defmodule Mudbrick.TextBlock.Output do
3030
output
3131
|> leading(line)
3232
|> reset_offset(x_offsetter.(line))
33-
|> reduce_parts(line, Tj, :first_line, x_offsetter)
33+
|> reduce_parts(line, TJ, :first_line, x_offsetter)
3434
|> offset(x_offsetter.(line))
3535
end
3636

3737
def reduce_lines(output, [line | lines], x_offsetter) do
3838
output
3939
|> leading(line)
4040
|> reset_offset(x_offsetter.(line))
41-
|> reduce_parts(line, Tj, nil, x_offsetter)
41+
|> reduce_parts(line, TJ, nil, x_offsetter)
4242
|> offset(x_offsetter.(line))
4343
|> reduce_lines(lines, x_offsetter)
4444
end
@@ -57,19 +57,19 @@ defmodule Mudbrick.TextBlock.Output do
5757

5858
defp reduce_parts(output, %Line{parts: [part]} = line, _operator, :first_line, x_offsetter) do
5959
output
60-
|> Output.add_part(part, Tj)
60+
|> Output.add_part(part, TJ)
6161
|> underline(part, x_offsetter.(line))
6262
end
6363

6464
defp reduce_parts(output, %Line{parts: []}, _operator, nil, _x_offsetter) do
6565
output
66-
|> Output.add(%Tj{font: output.font, text: ""})
66+
|> Output.add(%TJ{font: output.font, text: ""})
6767
|> Output.add(%TStar{})
6868
end
6969

7070
defp reduce_parts(output, %Line{parts: [part]} = line, _operator, nil, x_offsetter) do
7171
output
72-
|> Output.add_part(part, Tj)
72+
|> Output.add_part(part, TJ)
7373
|> Output.add(%TStar{})
7474
|> underline(part, x_offsetter.(line))
7575
end
@@ -84,7 +84,7 @@ defmodule Mudbrick.TextBlock.Output do
8484
output
8585
|> Output.add_part(part, operator)
8686
|> underline(part, x_offsetter.(line))
87-
|> reduce_parts(%{line | parts: parts}, Tj, line_kind, x_offsetter)
87+
|> reduce_parts(%{line | parts: parts}, TJ, line_kind, x_offsetter)
8888
end
8989

9090
defp underline(output, %Line.Part{underline: nil}, _line_x_offset), do: output
@@ -100,7 +100,7 @@ defmodule Mudbrick.TextBlock.Output do
100100
{offset_x, offset_y} = part.left_offset
101101

102102
x = x + offset_x - line_x_offset
103-
y = y + offset_y - 2
103+
y = y + offset_y - part.font_size / 10
104104

105105
Path.new()
106106
|> Path.move(to: {x, y})
@@ -150,7 +150,7 @@ defmodule Mudbrick.TextBlock.Output do
150150
def add_part(output, part, operator) do
151151
output
152152
|> with_font(
153-
struct!(operator, font: part.font, text: part.text),
153+
struct!(operator, font: part.font, font_size: part.font_size, text: part.text),
154154
part
155155
)
156156
|> colour(part.colour)

0 commit comments

Comments
 (0)