People: introduce verbosed optech mentions

kouloumos · kouloumos · commit cd07bc6ebf3b · 2023-05-25T16:21:46.000+03:00
show paragraph snippet for each mention in the people-index
diff --git a/_includes/optech-mention.html b/_includes/optech-mention.html
@@ -0,0 +1,6 @@
+<div class="backlink-box">
+    <div class="backlink-header">
+        #{{ reference.newsletter_number }} > {{ reference.header }}{% unless reference.title == nil %} > {{ reference.title }}{% endunless %}
+    </div>
+    {{ reference.paragraph | link_to_anchor: reference.url | markdownify }}
+</div>
diff --git a/_layouts/person.html b/_layouts/person.html
@@ -30,17 +30,6 @@
     {% capture primary_sources %}{{primary_sources}}{{newline}}- {{reference}}{% endcapture %}
   {% endfor %}
 
-  <!-- Build list of internal optech mentions -->
-  {% assign references = '' %}
-  {% for mention in page.optech_mentions %}
-    {% if mention.feature == true %}
-      {% assign bold = '{:.bold}' %}
-    {% else %}
-      {% assign bold='' %}
-    {% endif %}
-    {% include functions/get-mention-date.md %}
-    {% capture references %}{{references}}{{date}}- [{{mention.title}}]({{mention.url}}){{bold}}ENDENTRY{% endcapture %}
-  {% endfor %}
 
   <!-- Build list of see also entries -->
   {% for source in page.see_also %}
@@ -71,13 +60,13 @@
   {%- if page.optech_mentions and page.optech_mentions != '' -%}
     ## Optech newsletter mentions
 
-    {% assign sorted_references = references | split: 'ENDENTRY' | sort | reverse %}
+    {% assign sorted_references = page.optech_mentions | sort_by_newsletter_number | reverse %}
     {%- for reference in sorted_references -%}
-      {%- assign current_ref_year = reference | slice: 0, 4 -%}
+      {%- assign current_ref_year = reference.year -%}
       {%- if current_ref_year != last_ref_year -%}
         {{newline}}{{newline}}**{{current_ref_year}}**
       {%- endif -%}
-      {{newline}}{{reference | slice: 10, 9999999999 }}
+      {{newline}}{% include optech-mention.html %}
       {%- assign last_ref_year = current_ref_year -%}
     {%- endfor -%}
   {% endif %}{{newline}}{{newline}}
diff --git a/_plugins/auto-anchor.rb b/_plugins/auto-anchor.rb
@@ -7,6 +7,11 @@
 # - [Summary][]: Details
 # - [Summary](URL): Details
 
+def generate_anchor_list_link(anchor_link)
+  # custom clickable bullet linking to an anchor
+  "<a href=\"#{anchor_link}\" class=\"anchor-list-link\">●</a>"
+end
+
 def auto_anchor(content)
     content.gsub!(/^ *- .*/) do |string|
       ## Find shortest match for **bold**, *italics*, or [markdown][links]
@@ -17,7 +22,7 @@ def auto_anchor(content)
         string
       else
         slug = generate_slug(title)
-        id_prefix = "- {:#{slug} .anchor-list} <a href=\"#{slug}\" class=\"anchor-list-link\">●</a>"
+        id_prefix = "- {:#{slug} .anchor-list} #{generate_anchor_list_link(slug)}"
         string.sub!(/-/, id_prefix)
       end
     end
@@ -49,3 +54,19 @@ def render(context)
 
 Liquid::Template.register_tag('auto_anchor', Jekyll::RenderAutoAnchor)
 
+module TextFilter
+  # This is a custom filter used in `optech-mentions.html`
+  # to add anchor links to each backlink snippet
+  def link_to_anchor(text, url)
+    id_prefix = generate_anchor_list_link(url)
+    if text.start_with?("-")
+      # snippet is already a list item
+      text.sub!(/-/, id_prefix)
+    else
+      text.prepend("#{id_prefix} ")
+    end
+    text
+  end
+end
+
+Liquid::Template.register_filter(TextFilter)
diff --git a/_plugins/bidirectional_links_generator.rb b/_plugins/bidirectional_links_generator.rb
@@ -8,7 +8,6 @@
 # by allowing for automatic mentions using the double-bracket link syntax.
 class BidirectionalLinksGenerator < Jekyll::Generator
     def generate(site)
-     
       # This is only supported for english
       lang = "en"
       all_pages = site.documents.select { |doc| doc.url.start_with?("/#{lang}/") }
@@ -82,6 +81,12 @@ def generate(site)
           HTML
         )
       end
+
+      # we need the topic links for manual substitutions of links later on 
+      @topics_links = site.collections["topics"].map do |topic|
+        ["topic #{topic.data["shortname"] || topic.data["title"]}", topic.url]
+      end
+
       # Newsletter mentions
       # =====================
       newsletter_pages = pages_with_link_syntax.select { |doc| doc.url.start_with?("/#{lang}/newsletters/") }
@@ -95,21 +100,43 @@ def generate(site)
           if page_in_question.content.include?(target_page_href)
             # The page_in_question mentions the current page, we now need to 
             # find the specific mentions.
-            mentions = get_mentions_of(page_in_question, target_page_href)
+            mentions = get_mentions_of(page_in_question, target_page_href, current_page.collection.label)
             current_page.data["optech_mentions"] ||= []  # Initialize if not already present
             # Add the calculated mentions to `optech_mentions`
             # Note: a page might mentioning another page more than once
             mentions.each do |mention|
-              current_page.data["optech_mentions"] << {
-                "title" => mention["title"],
-                "url" => mention["url"]
-              }
+              current_page.data["optech_mentions"] << mention
             end
           end
         end
       end
     end
 
+    def liquify(content, date)
+      context = Liquid::Context.new({}, {}, { site: Jekyll.sites.first })
+      context['page'] = { 'date' => date } # needed to identify deprecated_links
+      template = Liquid::Template.parse(content)
+      content_parsed = template.render(context)
+    end
+
+    def get_external_links(page)
+      # this assumes that a "{% include {references, linkers/issues}.md %}" line
+      # exists at the end of the documents and external links are declared after it
+
+      # get all the references after the {% include _ %} line
+      regex_for_first_include = /\{% include (?:references\.md|linkers\/issues\.md).*?%\}/
+      references = page.content.split(regex_for_first_include, 2).last.strip
+      references.prepend("{% include references.md %}\n")
+
+      # manually trigger the replacement of the {% include %} tags in order to
+      # have all the required links ([key]:url) needed for the matching snippets
+      references_parsed = liquify(references, page.date)
+
+      # Search for all occurrences of the pattern "[key]: url"
+      # and return them in an array
+      references_parsed.scan(/\[([^\]]+?)\]\s*:\s*(\S+)/i)
+    end
+
     def find_title(string)
       title = capture_group = ""
       ## Find shortest match for **bold**, *italics*, or [markdown][links]
@@ -157,10 +184,10 @@ def extract_slug_from_manual_anchor(text)
       # - remove liquid anchor syntax from the result
       # - extract slug to use it on the generated anchor list link
       # example of this pattern can be seen in `en/newsletter/2019-06-12-newsletter.md`
-      match = text.match(/\{:#(\w+)\}/)
+      match = text.match(/\{:#([\w-]+)\}/)
       if match
         slug = "##{match[1]}" # extract slug
-        text.sub!(/\{:#\w+\}\n?/, "") # Remove the {:#slug} syntax and optional trailing newline
+        text.sub!(/#{match[0]}/, "") # Remove the matched {:#slug} syntax
         slug
       else
         nil
@@ -169,21 +196,30 @@ def extract_slug_from_manual_anchor(text)
 
     # This method searches the content for paragraphs that link to the
     # the target page and returns these mentions
-    def get_mentions_of(page, target_page_url)
+    def get_mentions_of(page, target_page_url, collection)
       # This is called only when we know that a match exists
       # The logic here assumes that:
+      # - paragraphs have headers
       # - each block of text (paragraph) is seperated by an empty line 
       # - primary titles are enclosed in **bold**
       # - secondary (nested) titles are enclosed in *italics*
 
       content = page.content
+      external_links = collection == "people" ? 
+        get_external_links(page).reverse + @topics_links : [] # people-index specific
+
       # Split the content into paragraphs
       paragraphs = content.split(/\n\n+/)
+      # Find all the headers in the content
+      headers = content.scan(/^#+\s+(.*)$/).flatten
 
       # Create an array of hashes containing:
+      # - the paragraph text
+      # - the associated header
       # - the associated url
-      # - the associated title
+      # - the associated title (when is not part of the paragraph)
       matching_paragraphs = []
+      current_header = 0
       current_title = []
 
       # Iterate over all paragraphs to find those that match the given url
@@ -215,6 +251,19 @@ def get_mentions_of(page, target_page_url)
 
         # If the current paragraph contains the URL, add it to the matching paragraphs
         if p.include?(target_page_url)
+          if collection == "people"
+            # Loop through the array of [key]:url_replace matches and replace
+            # - the occurrences of "[key][]" with "[key](url_replace)"
+            # - the occurrences of "[something][key]" with "[something](url_replace)"
+            external_links.each do |match|
+              key_pattern = match[0].gsub(/\s/, '\s+') # to work with multiline keys
+              p.gsub!(/\[(#{key_pattern})\]\[\]/im, "[\\1](#{match[1]})")
+              p.gsub!(/\[(.+?)\]\[(#{key_pattern})\]/im, "[\\1](#{match[1]})")
+            end
+            # manually replace common liquid variables in paragraph
+            p.gsub!(/#{Regexp.escape("{{bse}}")}/,"https://bitcoin.stackexchange.com/a/")
+          end
+
           # generate slug for matching paragraph
           slug = extract_slug_from_manual_anchor(p)
           if slug.nil?
@@ -230,9 +279,33 @@ def get_mentions_of(page, target_page_url)
             "title"=> current_title.join(": "), 
             "url" => "#{page.url}#{slug}"
           }
+          if collection == "people"
+            # People index has verbosed mentions
+            matching_paragraph.merge!({
+              "paragraph"=> p.lstrip,
+              "header"=> headers[current_header],
+              "newsletter_number" => page.title.sub("Bitcoin Optech Newsletter #", "").to_i,
+              "year" => File.basename(page.path)[0, 4]
+            })
+
+            if !title.empty?
+              # paragraph has title
+              # for the verbosed mentions we display the paragraph that contains
+              # the mention (see `optech-mentions.html`), therefore we do not
+              # need to repeat the title 
+              current_title.pop # this way we keep the parent title
+              matching_paragraph["title"] = current_title[0]
+            end
+          end
           matching_paragraphs << matching_paragraph
         end
+
+        # update to the next header when parse through it
+        if p.sub(/^#+\s*/, "") == headers[(current_header + 1) % headers.length()]
+          current_header += 1
+        end
       end
+    
       # Return the matching paragraphs
       matching_paragraphs
     end
diff --git a/_plugins/common_utils.rb b/_plugins/common_utils.rb
@@ -14,4 +14,15 @@ def generate_slug(title)
   # An empty context is used here because we only need to parse the liquid
   # string and don't require any additional variables or data.
   slug.render(Liquid::Context.new) 
-end
+end
+
+# this is a custom filter used in `person.html` to help with ordering
+module Jekyll
+  module OptechMentionsSortFilter
+    def sort_by_newsletter_number(array)
+      array.sort_by { |item| item['newsletter_number'] }
+    end
+  end
+end
+
+Liquid::Template.register_filter(Jekyll::OptechMentionsSortFilter)
diff --git a/assets/css/main.scss b/assets/css/main.scss
@@ -324,4 +324,27 @@ div.podcast .anchor-list {
   width: 32px;
   margin: 2px;
   display: inline;
+}
+
+/* Backlinks */
+
+.backlink-box {
+  position: relative;
+  font-size: 0.9em;
+  background: #f5f5f5;
+  border-radius: 4px;
+  padding-left: 3em;
+  padding-top: 1.7em;
+  padding-right: 0.6em;
+  p {
+    padding-bottom: 0.4em;
+  }
+}
+
+.backlink-header {
+  position: absolute;
+  top: 0.3em;
+  left: 0.7em;
+  font-size: 0.85em;
+  color: #828282;
 }