From 76328b1334e1429e1a2719af2d041ac5fa86dd0e Mon Sep 17 00:00:00 2001 From: Federico Igne Date: Sun, 28 Aug 2022 17:07:34 +0100 Subject: feat(source): provide tangled code alongside the literate program On one hand this defeats the purpose of having the literate program, but it avoids the awkward "bootstrapping" experience. --- v0.1.0/Cargo.toml | 14 +++++ v0.1.0/src/main.rs | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 v0.1.0/Cargo.toml create mode 100644 v0.1.0/src/main.rs (limited to 'v0.1.0') diff --git a/v0.1.0/Cargo.toml b/v0.1.0/Cargo.toml new file mode 100644 index 0000000..50660af --- /dev/null +++ b/v0.1.0/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "pangler" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lazy_static = "1.4" +regex = "1.5" +pandoc = "0.8" +pandoc_ast = "0.8" +clap = { version = "3.1", features = ["derive"] } + diff --git a/v0.1.0/src/main.rs b/v0.1.0/src/main.rs new file mode 100644 index 0000000..ea4e4f7 --- /dev/null +++ b/v0.1.0/src/main.rs @@ -0,0 +1,170 @@ +use clap::Parser; +use lazy_static::lazy_static; +use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc}; +use pandoc_ast::Block; +use regex::{Captures,Regex}; +use std::borrow::Cow; +use std::collections::HashMap; +use std::fs; +use std::io::Result; +use std::path::{Path,PathBuf}; + +const BASE: &str = "code"; + +type Blocks<'a> = HashMap>; + +/// A tangler for Literate Programming in Pandoc +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Config { + /// Maximum substitution depth + #[clap(short, long, default_value_t = 10)] + depth: u32, + + /// Base output directory [default: './code'] + #[clap(short, long)] + output: Option, + + /// Input files + input: Vec, +} + +/* Write code to target file */ +fn write_to_file>(base: &Option, path: P, content: &str) -> std::io::Result<()> { + if path.as_ref().is_relative() { + let path = base.clone().unwrap_or(PathBuf::from(BASE)).join(path); + /* There is always *at least* the base directory as a parent */ + fs::create_dir_all(path.parent().unwrap())?; + fs::write(path, content)?; + } else { + eprintln!("Absolute paths not supported: {}", path.as_ref().to_string_lossy()) + } + Ok(()) +} + +/* Indent block of code */ +fn indent<'a>(input: Cow<'a,str>, indent: usize) -> Cow<'a,str> { + if indent > 0 { + let prefix = format!("{:indent$}", ""); + let mut output = String::with_capacity(input.len() + indent*input.lines().count()); + input.lines().enumerate().for_each(|(i,line)| { + if i > 0 { + output.push('\n'); + } + if !line.is_empty() { + output.push_str(&prefix); + output.push_str(line); + } + }); + Cow::Owned(output) + } else { + input + } +} + +/* + * Here are some notes on the following function + * + * lazy_static! { + * static ref MACRO: Regex = Regex::new(r"regex").unwrap(); + * } + * + * let mut text = Cow::from("This is some text..."); + * while MACRO.is_match(&text) { + * text = MACRO.replace_all(&text, _closure); + * } + * + * The problem with this version is that due to how `Cow` works, the value returned by + * `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is + * because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes + * place, so for the returned value to be valid, `text` still needs to be available. + * But text gets overridden right away, so, in principle, if no replacement takes place `text` + * gets overridden by a reference to it (losing data). + * + * Note that this doesn't happen in practice (but the compiler doesn't know about this) because + * the `replace_all` function is applied as long as some replacement is possible (`while` + * condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value. + * + * This is how you would solve the problem instead: + * + * while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) { + * text = Cow::from(new_text); + * } + * + * In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`) + * of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using + * the `Cow::from()` function. No heap allocation is performed, and the string is not copied. + */ +fn build(base: &Option, blocks: &Blocks, depth: u32) { + lazy_static! { + static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap(); + static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap(); + } + blocks.iter().for_each(|(k,v)| if PATH.is_match(k) { + let mut d = 0; + let mut code = v.clone(); // No clone is happening because the value is a `Borrowed` + // Here `replace_all` returns a `Owned` value only when a replacement takes place. + // We can use it to recursively build blocks of code until no more substitutions are + // necessary (i.e., `replace_all` returns a `Borrowed`). + while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| { + let block = if d < depth { + blocks.get(&caps[2]).expect("Block not present").clone() + } else { + eprintln!("Reached maximum depth, output might be truncated. Increase `--depth` accordingly."); + Cow::Owned(String::from("")) + }; + indent(block, caps[1].len()) + }) { + code = Cow::from(step); + d += 1; + } + write_to_file(base, k, &code).expect("Unable to write to file"); + }) +} + +fn main() -> Result<()> { + let config = Config::parse(); + let mut pandoc = Pandoc::new(); + /* Pandoc input setup */ + pandoc.set_input(InputKind::Files(config.input)); + pandoc.set_input_format(InputFormat::Markdown, vec![]); + /* Pandoc output setup */ + pandoc.set_output(OutputKind::Pipe); + pandoc.set_output_format(OutputFormat::Json, vec![]); + /* Process literate program */ + pandoc.add_filter(move |json| pandoc_ast::filter(json, |pandoc| { + let mut blocks: Blocks = HashMap::new(); + pandoc.blocks.iter().for_each(|block| + if let Block::CodeBlock((id,classes,attrs), code) = block { + /* Only process blocks with an ID */ + if !id.is_empty() { + let key = { + if let Some(path) = attrs.iter().find(|(k,_)| k == "path") { + format!("{}{}", path.1, id) + } else { + id.to_string() + } + }; + /* Insert (or replace) block of code. In case of ID clash, the standard + * behaviour is to append the new code to the existing snippet. Use the class + * `.override` to override the previously encountered snippets. + */ + if classes.iter().any(|c| c == "override") { + blocks.insert(key, Cow::from(code)); + } else { + blocks.entry(key) + .and_modify(|s| { *s += "\n"; *s += Cow::from(code) }) + .or_insert(Cow::from(code)); + } + } else { + eprintln!("Ignoring codeblock without ID:"); + eprintln!("{}", indent(Cow::from(code),4)); + } + } + ); + build(&config.output, &blocks, config.depth); + pandoc + })); + pandoc.execute().unwrap(); + Ok(()) +} -- cgit v1.2.3