Switch to use Comrak for syntax highlighting ()

This commit is contained in:
Chris Wong 2024-08-22 12:58:41 +10:00 committed by GitHub
parent f0d11ce147
commit a3c8ea3b4d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 127 additions and 91 deletions

27
docs/Cargo.lock generated
View file

@ -32,6 +32,21 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.3.2" version = "1.3.2"
@ -82,6 +97,7 @@ dependencies = [
"once_cell", "once_cell",
"regex", "regex",
"slug", "slug",
"syntect",
"typed-arena", "typed-arena",
"unicode_categories", "unicode_categories",
] ]
@ -198,6 +214,16 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fancy-regex"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
dependencies = [
"bit-set",
"regex",
]
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.0.32" version = "1.0.32"
@ -519,6 +545,7 @@ checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1"
dependencies = [ dependencies = [
"bincode", "bincode",
"bitflags", "bitflags",
"fancy-regex",
"flate2", "flate2",
"fnv", "fnv",
"once_cell", "once_cell",

View file

@ -11,7 +11,7 @@ description = "Documentation for Maud."
edition = "2021" edition = "2021"
[dependencies] [dependencies]
comrak = { version = "*", default-features = false } comrak = { version = "*", default-features = false, features = ["syntect"] }
maud = { path = "../maud" } maud = { path = "../maud" }
serde_json = "*" serde_json = "*"
syntect = "*" syntect = "*"

View file

@ -1,5 +1,5 @@
use comrak::{ use comrak::{
nodes::{AstNode, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink, NodeValue}, nodes::{AstNode, NodeCodeBlock, NodeHeading, NodeLink, NodeValue},
Arena, Arena,
}; };
use docs::{ use docs::{
@ -14,11 +14,6 @@ use std::{
path::Path, path::Path,
str, str,
}; };
use syntect::{
highlighting::{Color, ThemeSet},
html::highlighted_html_for_string,
parsing::SyntaxSet,
};
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
let args = env::args().collect::<Vec<_>>(); let args = env::args().collect::<Vec<_>>();
@ -55,7 +50,7 @@ fn build_page(
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let page = Page::load(&arena, input_path)?; let page = Page::load(&arena, input_path)?;
postprocess(page.content)?; postprocess(page.content);
let markup = views::main(slug, page, &nav, version, hash); let markup = views::main(slug, page, &nav, version, hash);
@ -65,12 +60,10 @@ fn build_page(
Ok(()) Ok(())
} }
fn postprocess<'a>(content: &'a AstNode<'a>) -> Result<(), Box<dyn Error>> { fn postprocess<'a>(content: &'a AstNode<'a>) {
lower_headings(content); lower_headings(content);
rewrite_md_links(content); rewrite_md_links(content);
strip_hidden_code(content); strip_hidden_code(content);
highlight_code(content)?;
Ok(())
} }
fn lower_headings<'a>(root: &'a AstNode<'a>) { fn lower_headings<'a>(root: &'a AstNode<'a>) {
@ -98,8 +91,7 @@ fn strip_hidden_code<'a>(root: &'a AstNode<'a>) {
for node in root.descendants() { for node in root.descendants() {
let mut data = node.data.borrow_mut(); let mut data = node.data.borrow_mut();
if let NodeValue::CodeBlock(NodeCodeBlock { info, literal, .. }) = &mut data.value { if let NodeValue::CodeBlock(NodeCodeBlock { info, literal, .. }) = &mut data.value {
let info = parse_code_block_info(info); if info.split(',').map(str::trim).all(|lang| lang != "rust") {
if !info.contains(&"rust") {
continue; continue;
} }
*literal = strip_hidden_code_inner(literal); *literal = strip_hidden_code_inner(literal);
@ -117,41 +109,3 @@ fn strip_hidden_code_inner(literal: &str) -> String {
.collect::<Vec<_>>(); .collect::<Vec<_>>();
lines.join("\n") lines.join("\n")
} }
fn highlight_code<'a>(root: &'a AstNode<'a>) -> Result<(), Box<dyn Error>> {
let ss = SyntaxSet::load_defaults_newlines();
let ts = ThemeSet::load_defaults();
let mut theme = ts.themes["InspiredGitHub"].clone();
theme.settings.background = Some(Color {
r: 0xff,
g: 0xee,
b: 0xff,
a: 0xff,
});
for node in root.descendants() {
let mut data = node.data.borrow_mut();
if let NodeValue::CodeBlock(NodeCodeBlock { info, literal, .. }) = &mut data.value {
let info = parse_code_block_info(info);
let syntax = info
.into_iter()
.filter_map(|token| ss.find_syntax_by_token(token))
.next()
.unwrap_or_else(|| ss.find_syntax_plain_text());
let mut literal = std::mem::take(literal);
if !literal.ends_with('\n') {
// Syntect expects a trailing newline
literal.push('\n');
}
let html = highlighted_html_for_string(&literal, &ss, syntax, &theme)?;
data.value = NodeValue::HtmlBlock(NodeHtmlBlock {
literal: html,
..Default::default()
});
}
}
Ok(())
}
fn parse_code_block_info(info: &str) -> Vec<&str> {
info.split(',').map(str::trim).collect()
}

44
docs/src/highlight.rs Normal file
View file

@ -0,0 +1,44 @@
use comrak::{
plugins::syntect::{SyntectAdapter, SyntectAdapterBuilder},
Plugins,
};
use std::rc::Rc;
use syntect::highlighting::{Color, ThemeSet};
pub struct Highlighter {
adapter: Rc<SyntectAdapter>,
}
impl Highlighter {
pub fn get() -> Self {
Self {
adapter: SYNTECT_ADAPTER.with(Rc::clone),
}
}
pub fn as_plugins(&self) -> Plugins<'_> {
let mut plugins = Plugins::default();
plugins.render.codefence_syntax_highlighter = Some(&*self.adapter);
plugins
}
}
thread_local! {
static SYNTECT_ADAPTER: Rc<SyntectAdapter> = Rc::new({
SyntectAdapterBuilder::new()
.theme_set({
let mut ts = ThemeSet::load_defaults();
let mut theme = ts.themes["InspiredGitHub"].clone();
theme.settings.background = Some(Color {
r: 0xff,
g: 0xee,
b: 0xff,
a: 0xff,
});
ts.themes.insert("InspiredGitHub2".to_string(), theme);
ts
})
.theme("InspiredGitHub2")
.build()
});
}

View file

@ -1,3 +1,4 @@
pub mod highlight;
pub mod page; pub mod page;
pub mod string_writer; pub mod string_writer;
pub mod views; pub mod views;

View file

@ -3,50 +3,11 @@ use maud::{html, Markup, PreEscaped, Render, DOCTYPE};
use std::str; use std::str;
use crate::{ use crate::{
highlight::Highlighter,
page::{default_comrak_options, Page}, page::{default_comrak_options, Page},
string_writer::StringWriter, string_writer::StringWriter,
}; };
struct Comrak<'a>(&'a AstNode<'a>);
impl<'a> Render for Comrak<'a> {
fn render_to(&self, buffer: &mut String) {
comrak::format_html(self.0, &default_comrak_options(), &mut StringWriter(buffer)).unwrap();
}
}
/// Hack! The page title is wrapped in a `Paragraph` node, which introduces an
/// extra `<p>` tag that we don't want most of the time.
struct ComrakRemovePTags<'a>(&'a AstNode<'a>);
impl<'a> Render for ComrakRemovePTags<'a> {
fn render(&self) -> Markup {
let mut buffer = String::new();
comrak::format_html(
self.0,
&default_comrak_options(),
&mut StringWriter(&mut buffer),
)
.unwrap();
assert!(buffer.starts_with("<p>") && buffer.ends_with("</p>\n"));
PreEscaped(
buffer
.trim_start_matches("<p>")
.trim_end_matches("</p>\n")
.to_string(),
)
}
}
struct ComrakText<'a>(&'a AstNode<'a>);
impl<'a> Render for ComrakText<'a> {
fn render_to(&self, buffer: &mut String) {
comrak::format_commonmark(self.0, &default_comrak_options(), &mut StringWriter(buffer))
.unwrap();
}
}
pub fn main<'a>( pub fn main<'a>(
slug: &str, slug: &str,
page: Page<'a>, page: Page<'a>,
@ -124,3 +85,52 @@ pub fn main<'a>(
} }
} }
} }
struct Comrak<'a>(&'a AstNode<'a>);
impl<'a> Render for Comrak<'a> {
fn render_to(&self, buffer: &mut String) {
let highlighter = Highlighter::get();
comrak::format_html_with_plugins(
self.0,
&default_comrak_options(),
&mut StringWriter(buffer),
&highlighter.as_plugins(),
)
.unwrap();
}
}
/// Hack! The page title is wrapped in a `Paragraph` node, which introduces an
/// extra `<p>` tag that we don't want most of the time.
struct ComrakRemovePTags<'a>(&'a AstNode<'a>);
impl<'a> Render for ComrakRemovePTags<'a> {
fn render(&self) -> Markup {
let mut buffer = String::new();
let highlighter = Highlighter::get();
comrak::format_html_with_plugins(
self.0,
&default_comrak_options(),
&mut StringWriter(&mut buffer),
&highlighter.as_plugins(),
)
.unwrap();
assert!(buffer.starts_with("<p>") && buffer.ends_with("</p>\n"));
PreEscaped(
buffer
.trim_start_matches("<p>")
.trim_end_matches("</p>\n")
.to_string(),
)
}
}
struct ComrakText<'a>(&'a AstNode<'a>);
impl<'a> Render for ComrakText<'a> {
fn render_to(&self, buffer: &mut String) {
comrak::format_commonmark(self.0, &default_comrak_options(), &mut StringWriter(buffer))
.unwrap();
}
}