8
8
# by allowing for automatic mentions using the double-bracket link syntax.
9
9
class BidirectionalLinksGenerator < Jekyll ::Generator
10
10
def generate ( site )
11
-
12
11
# This is only supported for english
13
12
lang = "en"
14
13
all_pages = site . documents . select { |doc | doc . url . start_with? ( "/#{ lang } /" ) }
@@ -82,6 +81,12 @@ def generate(site)
82
81
HTML
83
82
)
84
83
end
84
+
85
+ # we need the topic links for manual substitutions of links later on
86
+ @topics_links = site . collections [ "topics" ] . map do |topic |
87
+ [ "topic #{ topic . data [ "shortname" ] || topic . data [ "title" ] } " , topic . url ]
88
+ end
89
+
85
90
# Newsletter mentions
86
91
# =====================
87
92
newsletter_pages = pages_with_link_syntax . select { |doc | doc . url . start_with? ( "/#{ lang } /newsletters/" ) }
@@ -95,21 +100,43 @@ def generate(site)
95
100
if page_in_question . content . include? ( target_page_href )
96
101
# The page_in_question mentions the current page, we now need to
97
102
# find the specific mentions.
98
- mentions = get_mentions_of ( page_in_question , target_page_href )
103
+ mentions = get_mentions_of ( page_in_question , target_page_href , current_page . collection . label )
99
104
current_page . data [ "optech_mentions" ] ||= [ ] # Initialize if not already present
100
105
# Add the calculated mentions to `optech_mentions`
101
106
# Note: a page might mentioning another page more than once
102
107
mentions . each do |mention |
103
- current_page . data [ "optech_mentions" ] << {
104
- "title" => mention [ "title" ] ,
105
- "url" => mention [ "url" ]
106
- }
108
+ current_page . data [ "optech_mentions" ] << mention
107
109
end
108
110
end
109
111
end
110
112
end
111
113
end
112
114
115
+ def liquify ( content , date )
116
+ context = Liquid ::Context . new ( { } , { } , { site : Jekyll . sites . first } )
117
+ context [ 'page' ] = { 'date' => date } # needed to identify deprecated_links
118
+ template = Liquid ::Template . parse ( content )
119
+ content_parsed = template . render ( context )
120
+ end
121
+
122
+ def get_external_links ( page )
123
+ # this assumes that a "{% include {references, linkers/issues}.md %}" line
124
+ # exists at the end of the documents and external links are declared after it
125
+
126
+ # get all the references after the {% include _ %} line
127
+ regex_for_first_include = /\{ % include (?:references\. md|linkers\/ issues\. md).*?%\} /
128
+ references = page . content . split ( regex_for_first_include , 2 ) . last . strip
129
+ references . prepend ( "{% include references.md %}\n " )
130
+
131
+ # manually trigger the replacement of the {% include %} tags in order to
132
+ # have all the required links ([key]:url) needed for the matching snippets
133
+ references_parsed = liquify ( references , page . date )
134
+
135
+ # Search for all occurrences of the pattern "[key]: url"
136
+ # and return them in an array
137
+ references_parsed . scan ( /\[ ([^\] ]+?)\] \s *:\s *(\S +)/i )
138
+ end
139
+
113
140
def find_title ( string )
114
141
title = capture_group = ""
115
142
## Find shortest match for **bold**, *italics*, or [markdown][links]
@@ -157,10 +184,10 @@ def extract_slug_from_manual_anchor(text)
157
184
# - remove liquid anchor syntax from the result
158
185
# - extract slug to use it on the generated anchor list link
159
186
# example of this pattern can be seen in `en/newsletter/2019-06-12-newsletter.md`
160
- match = text . match ( /\{ :#(\w +)\} / )
187
+ match = text . match ( /\{ :#([ \w -] +)\} / )
161
188
if match
162
189
slug = "##{ match [ 1 ] } " # extract slug
163
- text . sub! ( /\{ :# \w + \} \n ? / , "" ) # Remove the {:#slug} syntax and optional trailing newline
190
+ text . sub! ( /#{ match [ 0 ] } / , "" ) # Remove the matched {:#slug} syntax
164
191
slug
165
192
else
166
193
nil
@@ -169,21 +196,30 @@ def extract_slug_from_manual_anchor(text)
169
196
170
197
# This method searches the content for paragraphs that link to the
171
198
# the target page and returns these mentions
172
- def get_mentions_of ( page , target_page_url )
199
+ def get_mentions_of ( page , target_page_url , collection )
173
200
# This is called only when we know that a match exists
174
201
# The logic here assumes that:
202
+ # - paragraphs have headers
175
203
# - each block of text (paragraph) is seperated by an empty line
176
204
# - primary titles are enclosed in **bold**
177
205
# - secondary (nested) titles are enclosed in *italics*
178
206
179
207
content = page . content
208
+ external_links = collection == "people" ?
209
+ get_external_links ( page ) . reverse + @topics_links : [ ] # people-index specific
210
+
180
211
# Split the content into paragraphs
181
212
paragraphs = content . split ( /\n \n +/ )
213
+ # Find all the headers in the content
214
+ headers = content . scan ( /^#+\s +(.*)$/ ) . flatten
182
215
183
216
# Create an array of hashes containing:
217
+ # - the paragraph text
218
+ # - the associated header
184
219
# - the associated url
185
- # - the associated title
220
+ # - the associated title (when is not part of the paragraph)
186
221
matching_paragraphs = [ ]
222
+ current_header = 0
187
223
current_title = [ ]
188
224
189
225
# Iterate over all paragraphs to find those that match the given url
@@ -215,6 +251,19 @@ def get_mentions_of(page, target_page_url)
215
251
216
252
# If the current paragraph contains the URL, add it to the matching paragraphs
217
253
if p . include? ( target_page_url )
254
+ if collection == "people"
255
+ # Loop through the array of [key]:url_replace matches and replace
256
+ # - the occurrences of "[key][]" with "[key](url_replace)"
257
+ # - the occurrences of "[something][key]" with "[something](url_replace)"
258
+ external_links . each do |match |
259
+ key_pattern = match [ 0 ] . gsub ( /\s / , '\s+' ) # to work with multiline keys
260
+ p . gsub! ( /\[ (#{ key_pattern } )\] \[ \] /im , "[\\ 1](#{ match [ 1 ] } )" )
261
+ p . gsub! ( /\[ (.+?)\] \[ (#{ key_pattern } )\] /im , "[\\ 1](#{ match [ 1 ] } )" )
262
+ end
263
+ # manually replace common liquid variables in paragraph
264
+ p . gsub! ( /#{ Regexp . escape ( "{{bse}}" ) } / , "https://bitcoin.stackexchange.com/a/" )
265
+ end
266
+
218
267
# generate slug for matching paragraph
219
268
slug = extract_slug_from_manual_anchor ( p )
220
269
if slug . nil?
@@ -230,9 +279,33 @@ def get_mentions_of(page, target_page_url)
230
279
"title" => current_title . join ( ": " ) ,
231
280
"url" => "#{ page . url } #{ slug } "
232
281
}
282
+ if collection == "people"
283
+ # People index has verbosed mentions
284
+ matching_paragraph . merge! ( {
285
+ "paragraph" => p . lstrip ,
286
+ "header" => headers [ current_header ] ,
287
+ "newsletter_number" => page . title . sub ( "Bitcoin Optech Newsletter #" , "" ) . to_i ,
288
+ "year" => File . basename ( page . path ) [ 0 , 4 ]
289
+ } )
290
+
291
+ if !title . empty?
292
+ # paragraph has title
293
+ # for the verbosed mentions we display the paragraph that contains
294
+ # the mention (see `optech-mentions.html`), therefore we do not
295
+ # need to repeat the title
296
+ current_title . pop # this way we keep the parent title
297
+ matching_paragraph [ "title" ] = current_title [ 0 ]
298
+ end
299
+ end
233
300
matching_paragraphs << matching_paragraph
234
301
end
302
+
303
+ # update to the next header when parse through it
304
+ if p . sub ( /^#+\s */ , "" ) == headers [ ( current_header + 1 ) % headers . length ( ) ]
305
+ current_header += 1
306
+ end
235
307
end
308
+
236
309
# Return the matching paragraphs
237
310
matching_paragraphs
238
311
end
0 commit comments