use clap::Parser; use lazy_static::lazy_static; use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc}; use pandoc_ast::Block; use regex::{Captures,Regex}; use std::borrow::Cow; use std::collections::HashMap; use std::fs; use std::io::Result; use std::path::{Path,PathBuf}; const BASE: &str = "code"; type Blocks<'a> = HashMap>; /// A tangler for Literate Programming in Pandoc #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] struct Config { /// Maximum substitution depth #[clap(short, long, default_value_t = 10)] depth: u32, /// Base output directory [default: './code'] #[clap(short, long)] output: Option, /// Input files input: Vec, } /* Write code to target file */ fn write_to_file>(base: &Option, path: P, content: &str) -> std::io::Result<()> { if path.as_ref().is_relative() { let path = base.clone().unwrap_or(PathBuf::from(BASE)).join(path); /* There is always *at least* the base directory as a parent */ fs::create_dir_all(path.parent().unwrap())?; fs::write(path, content)?; } else { eprintln!("Absolute paths not supported: {}", path.as_ref().to_string_lossy()) } Ok(()) } /* Indent block of code */ fn indent<'a>(input: Cow<'a,str>, indent: usize) -> Cow<'a,str> { if indent > 0 { let prefix = format!("{:indent$}", ""); let mut output = String::with_capacity(input.len() + indent*input.lines().count()); input.lines().enumerate().for_each(|(i,line)| { if i > 0 { output.push('\n'); } if !line.is_empty() { output.push_str(&prefix); output.push_str(line); } }); Cow::Owned(output) } else { input } } /* * Here are some notes on the following function * * lazy_static! { * static ref MACRO: Regex = Regex::new(r"regex").unwrap(); * } * * let mut text = Cow::from("This is some text..."); * while MACRO.is_match(&text) { * text = MACRO.replace_all(&text, _closure); * } * * The problem with this version is that due to how `Cow` works, the value returned by * `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is * because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes * place, so for the returned value to be valid, `text` still needs to be available. * But text gets overridden right away, so, in principle, if no replacement takes place `text` * gets overridden by a reference to it (losing data). * * Note that this doesn't happen in practice (but the compiler doesn't know about this) because * the `replace_all` function is applied as long as some replacement is possible (`while` * condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value. * * This is how you would solve the problem instead: * * while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) { * text = Cow::from(new_text); * } * * In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`) * of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using * the `Cow::from()` function. No heap allocation is performed, and the string is not copied. */ fn build(base: &Option, blocks: &Blocks, depth: u32) { lazy_static! { static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap(); static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap(); } blocks.iter().for_each(|(k,v)| if PATH.is_match(k) { let mut d = 0; let mut code = v.clone(); // No clone is happening because the value is a `Borrowed` // Here `replace_all` returns a `Owned` value only when a replacement takes place. // We can use it to recursively build blocks of code until no more substitutions are // necessary (i.e., `replace_all` returns a `Borrowed`). while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| { let block = if d < depth { blocks.get(&caps[2]).expect("Block not present").clone() } else { eprintln!("Reached maximum depth, output might be truncated. Increase `--depth` accordingly."); Cow::Owned(String::from("")) }; indent(block, caps[1].len()) }) { code = Cow::from(step); d += 1; } write_to_file(base, k, &code).expect("Unable to write to file"); }) } fn main() -> Result<()> { let config = Config::parse(); let mut pandoc = Pandoc::new(); /* Pandoc input setup */ pandoc.set_input(InputKind::Files(config.input)); pandoc.set_input_format(InputFormat::Markdown, vec![]); /* Pandoc output setup */ pandoc.set_output(OutputKind::Pipe); pandoc.set_output_format(OutputFormat::Json, vec![]); /* Process literate program */ pandoc.add_filter(move |json| pandoc_ast::filter(json, |pandoc| { let mut blocks: Blocks = HashMap::new(); pandoc.blocks.iter().for_each(|block| if let Block::CodeBlock((id,classes,attrs), code) = block { /* Only process blocks with an ID */ if !id.is_empty() { let key = { if let Some(path) = attrs.iter().find(|(k,_)| k == "path") { format!("{}{}", path.1, id) } else { id.to_string() } }; /* Insert (or replace) block of code. In case of ID clash, the standard * behaviour is to append the new code to the existing snippet. Use the class * `.override` to override the previously encountered snippets. */ if classes.iter().any(|c| c == "override") { blocks.insert(key, Cow::from(code)); } else { blocks.entry(key) .and_modify(|s| { *s += "\n"; *s += Cow::from(code) }) .or_insert(Cow::from(code)); } } else { eprintln!("Ignoring codeblock without ID:"); eprintln!("{}", indent(Cow::from(code),4)); } } ); build(&config.output, &blocks, config.depth); pandoc })); pandoc.execute().unwrap(); Ok(()) }