From a3c8ea3b4d3416a627588198a139c66657f2d5d3 Mon Sep 17 00:00:00 2001
From: Chris Wong <lambda.fairy@gmail.com>
Date: Thu, 22 Aug 2024 12:58:41 +1000
Subject: [PATCH] Switch to use Comrak for syntax highlighting (#438)

---
 docs/Cargo.lock            | 27 ++++++++++++
 docs/Cargo.toml            |  2 +-
 docs/src/bin/build_page.rs | 54 ++---------------------
 docs/src/highlight.rs      | 44 +++++++++++++++++++
 docs/src/lib.rs            |  1 +
 docs/src/views.rs          | 90 +++++++++++++++++++++-----------------
 6 files changed, 127 insertions(+), 91 deletions(-)
 create mode 100644 docs/src/highlight.rs

diff --git a/docs/Cargo.lock b/docs/Cargo.lock
index ddff3f5..1f7fce2 100644
--- a/docs/Cargo.lock
+++ b/docs/Cargo.lock
@@ -32,6 +32,21 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "bit-set"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -82,6 +97,7 @@ dependencies = [
  "once_cell",
  "regex",
  "slug",
+ "syntect",
  "typed-arena",
  "unicode_categories",
 ]
@@ -198,6 +214,16 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
+[[package]]
+name = "fancy-regex"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
+dependencies = [
+ "bit-set",
+ "regex",
+]
+
 [[package]]
 name = "flate2"
 version = "1.0.32"
@@ -519,6 +545,7 @@ checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1"
 dependencies = [
  "bincode",
  "bitflags",
+ "fancy-regex",
  "flate2",
  "fnv",
  "once_cell",
diff --git a/docs/Cargo.toml b/docs/Cargo.toml
index 4fb463d..b4ece88 100644
--- a/docs/Cargo.toml
+++ b/docs/Cargo.toml
@@ -11,7 +11,7 @@ description = "Documentation for Maud."
 edition = "2021"
 
 [dependencies]
-comrak = { version = "*", default-features = false }
+comrak = { version = "*", default-features = false, features = ["syntect"] }
 maud = { path = "../maud" }
 serde_json = "*"
 syntect = "*"
diff --git a/docs/src/bin/build_page.rs b/docs/src/bin/build_page.rs
index b42b496..4533a39 100644
--- a/docs/src/bin/build_page.rs
+++ b/docs/src/bin/build_page.rs
@@ -1,5 +1,5 @@
 use comrak::{
-    nodes::{AstNode, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink, NodeValue},
+    nodes::{AstNode, NodeCodeBlock, NodeHeading, NodeLink, NodeValue},
     Arena,
 };
 use docs::{
@@ -14,11 +14,6 @@ use std::{
     path::Path,
     str,
 };
-use syntect::{
-    highlighting::{Color, ThemeSet},
-    html::highlighted_html_for_string,
-    parsing::SyntaxSet,
-};
 
 fn main() -> Result<(), Box<dyn Error>> {
     let args = env::args().collect::<Vec<_>>();
@@ -55,7 +50,7 @@ fn build_page(
         .collect::<Vec<_>>();
 
     let page = Page::load(&arena, input_path)?;
-    postprocess(page.content)?;
+    postprocess(page.content);
 
     let markup = views::main(slug, page, &nav, version, hash);
 
@@ -65,12 +60,10 @@ fn build_page(
     Ok(())
 }
 
-fn postprocess<'a>(content: &'a AstNode<'a>) -> Result<(), Box<dyn Error>> {
+fn postprocess<'a>(content: &'a AstNode<'a>) {
     lower_headings(content);
     rewrite_md_links(content);
     strip_hidden_code(content);
-    highlight_code(content)?;
-    Ok(())
 }
 
 fn lower_headings<'a>(root: &'a AstNode<'a>) {
@@ -98,8 +91,7 @@ fn strip_hidden_code<'a>(root: &'a AstNode<'a>) {
     for node in root.descendants() {
         let mut data = node.data.borrow_mut();
         if let NodeValue::CodeBlock(NodeCodeBlock { info, literal, .. }) = &mut data.value {
-            let info = parse_code_block_info(info);
-            if !info.contains(&"rust") {
+            if info.split(',').map(str::trim).all(|lang| lang != "rust") {
                 continue;
             }
             *literal = strip_hidden_code_inner(literal);
@@ -117,41 +109,3 @@ fn strip_hidden_code_inner(literal: &str) -> String {
         .collect::<Vec<_>>();
     lines.join("\n")
 }
-
-fn highlight_code<'a>(root: &'a AstNode<'a>) -> Result<(), Box<dyn Error>> {
-    let ss = SyntaxSet::load_defaults_newlines();
-    let ts = ThemeSet::load_defaults();
-    let mut theme = ts.themes["InspiredGitHub"].clone();
-    theme.settings.background = Some(Color {
-        r: 0xff,
-        g: 0xee,
-        b: 0xff,
-        a: 0xff,
-    });
-    for node in root.descendants() {
-        let mut data = node.data.borrow_mut();
-        if let NodeValue::CodeBlock(NodeCodeBlock { info, literal, .. }) = &mut data.value {
-            let info = parse_code_block_info(info);
-            let syntax = info
-                .into_iter()
-                .filter_map(|token| ss.find_syntax_by_token(token))
-                .next()
-                .unwrap_or_else(|| ss.find_syntax_plain_text());
-            let mut literal = std::mem::take(literal);
-            if !literal.ends_with('\n') {
-                // Syntect expects a trailing newline
-                literal.push('\n');
-            }
-            let html = highlighted_html_for_string(&literal, &ss, syntax, &theme)?;
-            data.value = NodeValue::HtmlBlock(NodeHtmlBlock {
-                literal: html,
-                ..Default::default()
-            });
-        }
-    }
-    Ok(())
-}
-
-fn parse_code_block_info(info: &str) -> Vec<&str> {
-    info.split(',').map(str::trim).collect()
-}
diff --git a/docs/src/highlight.rs b/docs/src/highlight.rs
new file mode 100644
index 0000000..b297fe4
--- /dev/null
+++ b/docs/src/highlight.rs
@@ -0,0 +1,44 @@
+use comrak::{
+    plugins::syntect::{SyntectAdapter, SyntectAdapterBuilder},
+    Plugins,
+};
+use std::rc::Rc;
+use syntect::highlighting::{Color, ThemeSet};
+
+pub struct Highlighter {
+    adapter: Rc<SyntectAdapter>,
+}
+
+impl Highlighter {
+    pub fn get() -> Self {
+        Self {
+            adapter: SYNTECT_ADAPTER.with(Rc::clone),
+        }
+    }
+
+    pub fn as_plugins(&self) -> Plugins<'_> {
+        let mut plugins = Plugins::default();
+        plugins.render.codefence_syntax_highlighter = Some(&*self.adapter);
+        plugins
+    }
+}
+
+thread_local! {
+    static SYNTECT_ADAPTER: Rc<SyntectAdapter> = Rc::new({
+        SyntectAdapterBuilder::new()
+            .theme_set({
+                let mut ts = ThemeSet::load_defaults();
+                let mut theme = ts.themes["InspiredGitHub"].clone();
+                theme.settings.background = Some(Color {
+                    r: 0xff,
+                    g: 0xee,
+                    b: 0xff,
+                    a: 0xff,
+                });
+                ts.themes.insert("InspiredGitHub2".to_string(), theme);
+                ts
+            })
+            .theme("InspiredGitHub2")
+            .build()
+    });
+}
diff --git a/docs/src/lib.rs b/docs/src/lib.rs
index f2e72dd..f3d6113 100644
--- a/docs/src/lib.rs
+++ b/docs/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod highlight;
 pub mod page;
 pub mod string_writer;
 pub mod views;
diff --git a/docs/src/views.rs b/docs/src/views.rs
index af1035a..7f21efd 100644
--- a/docs/src/views.rs
+++ b/docs/src/views.rs
@@ -3,50 +3,11 @@ use maud::{html, Markup, PreEscaped, Render, DOCTYPE};
 use std::str;
 
 use crate::{
+    highlight::Highlighter,
     page::{default_comrak_options, Page},
     string_writer::StringWriter,
 };
 
-struct Comrak<'a>(&'a AstNode<'a>);
-
-impl<'a> Render for Comrak<'a> {
-    fn render_to(&self, buffer: &mut String) {
-        comrak::format_html(self.0, &default_comrak_options(), &mut StringWriter(buffer)).unwrap();
-    }
-}
-
-/// Hack! The page title is wrapped in a `Paragraph` node, which introduces an
-/// extra `<p>` tag that we don't want most of the time.
-struct ComrakRemovePTags<'a>(&'a AstNode<'a>);
-
-impl<'a> Render for ComrakRemovePTags<'a> {
-    fn render(&self) -> Markup {
-        let mut buffer = String::new();
-        comrak::format_html(
-            self.0,
-            &default_comrak_options(),
-            &mut StringWriter(&mut buffer),
-        )
-        .unwrap();
-        assert!(buffer.starts_with("<p>") && buffer.ends_with("</p>\n"));
-        PreEscaped(
-            buffer
-                .trim_start_matches("<p>")
-                .trim_end_matches("</p>\n")
-                .to_string(),
-        )
-    }
-}
-
-struct ComrakText<'a>(&'a AstNode<'a>);
-
-impl<'a> Render for ComrakText<'a> {
-    fn render_to(&self, buffer: &mut String) {
-        comrak::format_commonmark(self.0, &default_comrak_options(), &mut StringWriter(buffer))
-            .unwrap();
-    }
-}
-
 pub fn main<'a>(
     slug: &str,
     page: Page<'a>,
@@ -124,3 +85,52 @@ pub fn main<'a>(
         }
     }
 }
+
+struct Comrak<'a>(&'a AstNode<'a>);
+
+impl<'a> Render for Comrak<'a> {
+    fn render_to(&self, buffer: &mut String) {
+        let highlighter = Highlighter::get();
+        comrak::format_html_with_plugins(
+            self.0,
+            &default_comrak_options(),
+            &mut StringWriter(buffer),
+            &highlighter.as_plugins(),
+        )
+        .unwrap();
+    }
+}
+
+/// Hack! The page title is wrapped in a `Paragraph` node, which introduces an
+/// extra `<p>` tag that we don't want most of the time.
+struct ComrakRemovePTags<'a>(&'a AstNode<'a>);
+
+impl<'a> Render for ComrakRemovePTags<'a> {
+    fn render(&self) -> Markup {
+        let mut buffer = String::new();
+        let highlighter = Highlighter::get();
+        comrak::format_html_with_plugins(
+            self.0,
+            &default_comrak_options(),
+            &mut StringWriter(&mut buffer),
+            &highlighter.as_plugins(),
+        )
+        .unwrap();
+        assert!(buffer.starts_with("<p>") && buffer.ends_with("</p>\n"));
+        PreEscaped(
+            buffer
+                .trim_start_matches("<p>")
+                .trim_end_matches("</p>\n")
+                .to_string(),
+        )
+    }
+}
+
+struct ComrakText<'a>(&'a AstNode<'a>);
+
+impl<'a> Render for ComrakText<'a> {
+    fn render_to(&self, buffer: &mut String) {
+        comrak::format_commonmark(self.0, &default_comrak_options(), &mut StringWriter(buffer))
+            .unwrap();
+    }
+}