88# by allowing for automatic mentions using the double-bracket link syntax.
99class BidirectionalLinksGenerator < Jekyll ::Generator
1010 def generate ( site )
11-
1211 # This is only supported for english
1312 lang = "en"
1413 all_pages = site . documents . select { |doc | doc . url . start_with? ( "/#{ lang } /" ) }
@@ -82,6 +81,12 @@ def generate(site)
8281 HTML
8382 )
8483 end
84+
85+ # we need the topic links for manual substitutions of links later on
86+ @topics_links = site . collections [ "topics" ] . map do |topic |
87+ [ "topic #{ topic . data [ "shortname" ] || topic . data [ "title" ] } " , topic . url ]
88+ end
89+
8590 # Newsletter mentions
8691 # =====================
8792 newsletter_pages = pages_with_link_syntax . select { |doc | doc . url . start_with? ( "/#{ lang } /newsletters/" ) }
@@ -95,21 +100,43 @@ def generate(site)
95100 if page_in_question . content . include? ( target_page_href )
96101 # The page_in_question mentions the current page, we now need to
97102 # find the specific mentions.
98- mentions = get_mentions_of ( page_in_question , target_page_href )
103+ mentions = get_mentions_of ( page_in_question , target_page_href , current_page . collection . label )
99104 current_page . data [ "optech_mentions" ] ||= [ ] # Initialize if not already present
100105 # Add the calculated mentions to `optech_mentions`
101106 # Note: a page might mentioning another page more than once
102107 mentions . each do |mention |
103- current_page . data [ "optech_mentions" ] << {
104- "title" => mention [ "title" ] ,
105- "url" => mention [ "url" ]
106- }
108+ current_page . data [ "optech_mentions" ] << mention
107109 end
108110 end
109111 end
110112 end
111113 end
112114
115+ def liquify ( content , date )
116+ context = Liquid ::Context . new ( { } , { } , { site : Jekyll . sites . first } )
117+ context [ 'page' ] = { 'date' => date } # needed to identify deprecated_links
118+ template = Liquid ::Template . parse ( content )
119+ content_parsed = template . render ( context )
120+ end
121+
122+ def get_external_links ( page )
123+ # this assumes that a "{% include {references, linkers/issues}.md %}" line
124+ # exists at the end of the documents and external links are declared after it
125+
126+ # get all the references after the {% include _ %} line
127+ regex_for_first_include = /\{ % include (?:references\. md|linkers\/ issues\. md).*?%\} /
128+ references = page . content . split ( regex_for_first_include , 2 ) . last . strip
129+ references . prepend ( "{% include references.md %}\n " )
130+
131+ # manually trigger the replacement of the {% include %} tags in order to
132+ # have all the required links ([key]:url) needed for the matching snippets
133+ references_parsed = liquify ( references , page . date )
134+
135+ # Search for all occurrences of the pattern "[key]: url"
136+ # and return them in an array
137+ references_parsed . scan ( /\[ ([^\] ]+?)\] \s *:\s *(\S +)/i )
138+ end
139+
113140 def find_title ( string )
114141 title = capture_group = ""
115142 ## Find shortest match for **bold**, *italics*, or [markdown][links]
@@ -157,10 +184,10 @@ def extract_slug_from_manual_anchor(text)
157184 # - remove liquid anchor syntax from the result
158185 # - extract slug to use it on the generated anchor list link
159186 # example of this pattern can be seen in `en/newsletter/2019-06-12-newsletter.md`
160- match = text . match ( /\{ :#(\w +)\} / )
187+ match = text . match ( /\{ :#([ \w -] +)\} / )
161188 if match
162189 slug = "##{ match [ 1 ] } " # extract slug
163- text . sub! ( /\{ :# \w + \} \n ? / , "" ) # Remove the {:#slug} syntax and optional trailing newline
190+ text . sub! ( /#{ match [ 0 ] } / , "" ) # Remove the matched {:#slug} syntax
164191 slug
165192 else
166193 nil
@@ -169,21 +196,30 @@ def extract_slug_from_manual_anchor(text)
169196
170197 # This method searches the content for paragraphs that link to the
171198 # the target page and returns these mentions
172- def get_mentions_of ( page , target_page_url )
199+ def get_mentions_of ( page , target_page_url , collection )
173200 # This is called only when we know that a match exists
174201 # The logic here assumes that:
202+ # - paragraphs have headers
175203 # - each block of text (paragraph) is seperated by an empty line
176204 # - primary titles are enclosed in **bold**
177205 # - secondary (nested) titles are enclosed in *italics*
178206
179207 content = page . content
208+ external_links = collection == "people" ?
209+ get_external_links ( page ) . reverse + @topics_links : [ ] # people-index specific
210+
180211 # Split the content into paragraphs
181212 paragraphs = content . split ( /\n \n +/ )
213+ # Find all the headers in the content
214+ headers = content . scan ( /^#+\s +(.*)$/ ) . flatten
182215
183216 # Create an array of hashes containing:
217+ # - the paragraph text
218+ # - the associated header
184219 # - the associated url
185- # - the associated title
220+ # - the associated title (when is not part of the paragraph)
186221 matching_paragraphs = [ ]
222+ current_header = 0
187223 current_title = [ ]
188224
189225 # Iterate over all paragraphs to find those that match the given url
@@ -215,6 +251,19 @@ def get_mentions_of(page, target_page_url)
215251
216252 # If the current paragraph contains the URL, add it to the matching paragraphs
217253 if p . include? ( target_page_url )
254+ if collection == "people"
255+ # Loop through the array of [key]:url_replace matches and replace
256+ # - the occurrences of "[key][]" with "[key](url_replace)"
257+ # - the occurrences of "[something][key]" with "[something](url_replace)"
258+ external_links . each do |match |
259+ key_pattern = match [ 0 ] . gsub ( /\s / , '\s+' ) # to work with multiline keys
260+ p . gsub! ( /\[ (#{ key_pattern } )\] \[ \] /im , "[\\ 1](#{ match [ 1 ] } )" )
261+ p . gsub! ( /\[ (.+?)\] \[ (#{ key_pattern } )\] /im , "[\\ 1](#{ match [ 1 ] } )" )
262+ end
263+ # manually replace common liquid variables in paragraph
264+ p . gsub! ( /#{ Regexp . escape ( "{{bse}}" ) } / , "https://bitcoin.stackexchange.com/a/" )
265+ end
266+
218267 # generate slug for matching paragraph
219268 slug = extract_slug_from_manual_anchor ( p )
220269 if slug . nil?
@@ -230,9 +279,33 @@ def get_mentions_of(page, target_page_url)
230279 "title" => current_title . join ( ": " ) ,
231280 "url" => "#{ page . url } #{ slug } "
232281 }
282+ if collection == "people"
283+ # People index has verbosed mentions
284+ matching_paragraph . merge! ( {
285+ "paragraph" => p . lstrip ,
286+ "header" => headers [ current_header ] ,
287+ "newsletter_number" => page . title . sub ( "Bitcoin Optech Newsletter #" , "" ) . to_i ,
288+ "year" => File . basename ( page . path ) [ 0 , 4 ]
289+ } )
290+
291+ if !title . empty?
292+ # paragraph has title
293+ # for the verbosed mentions we display the paragraph that contains
294+ # the mention (see `optech-mentions.html`), therefore we do not
295+ # need to repeat the title
296+ current_title . pop # this way we keep the parent title
297+ matching_paragraph [ "title" ] = current_title [ 0 ]
298+ end
299+ end
233300 matching_paragraphs << matching_paragraph
234301 end
302+
303+ # update to the next header when parse through it
304+ if p . sub ( /^#+\s */ , "" ) == headers [ ( current_header + 1 ) % headers . length ( ) ]
305+ current_header += 1
306+ end
235307 end
308+
236309 # Return the matching paragraphs
237310 matching_paragraphs
238311 end
0 commit comments