use std::io::Result; use std::borrow::Cow; use std::path::PathBuf; use std::collections::HashMap; use lazy_static::lazy_static; use regex::{Captures,Regex}; use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc}; use pandoc_ast::Block; type Blocks<'a> = HashMap>; /* * Here are some notes on the following function * * lazy_static! { * static ref MACRO: Regex = Regex::new(r"regex").unwrap(); * } * * let mut text = Cow::from("This is some text..."); * * The problem with this version is that due to how `Cow` works, the value returned by * `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is * because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes * place, so for the returned value to be valid, `text` still needs to be available. * But text gets overridden right away, so, in principle, if no replacement takes place `text` * gets overridden by a reference to it (losing data). * * Note that this doesn't happen in practice (but the compiler doesn't know about this) because * the `replace_all` function is applied as long as some replacement is possible (`while` * condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value. * * while MACRO.is_match(&text) { * text = MACRO.replace_all(&text, _closure); * } * * This is how you would solve the problem instead: * * while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) { * text = Cow::from(new_text); * } * * In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`) * of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using * the `Cow::from()` function. No heap allocation is performed, and the string is not copied. * * println!("{}", text) * */ fn build(blocks: &Blocks) { lazy_static! { static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap(); static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap(); } blocks.iter().for_each(|(k,v)| if PATH.is_match(k) { let mut code = v.clone(); // No clone is happening because the value is a `Borrowed` // Here `replace_all` returns a `Owned` value only when a replacement takes place. // We can use it to recursively build blocks of code until no more substitutions are // necessary (i.e., `replace_all` returns a `Borrowed`). while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| { let indent = caps[1].len(); blocks.get(&caps[2]) .expect("Block not present") .lines() .map(|l| format!("{:indent$}{}", "", l) ) .collect::>() .join("\n") } ) { code = Cow::from(step); } println!("[[{}]]\n{}", k, code); }) } fn main() -> Result<()> { let mut pandoc = Pandoc::new(); /* Pandoc input setup */ pandoc.set_input(InputKind::Files(vec![PathBuf::from("test.md")])); pandoc.set_input_format(InputFormat::Markdown, vec![]); /* Pandoc output setup */ pandoc.set_output(OutputKind::Pipe); pandoc.set_output_format(OutputFormat::Json, vec![]); /* Process literate program */ pandoc.add_filter(|json| pandoc_ast::filter(json, |pandoc| { let mut blocks: Blocks = HashMap::new(); pandoc.blocks.iter().for_each(|block| if let Block::CodeBlock((id,classes,attrs), code) = block { // dbg!(block); if !id.is_empty() { let mut key = attrs.iter() .find_map(|(k,v)| if k == "path" { Some(v.clone()) } else { None }) .unwrap_or(String::from("")); key.push_str(id); /* Insert (or replace) block of code. */ if classes.iter().any(|c| c == "override") { blocks.insert(key, Cow::from(code)); } else { blocks.entry(key) .and_modify(|s| { *s += "\n"; *s += Cow::from(code) }) .or_insert(Cow::from(code)); } } else { // println!("The following code has no ID:"); // code.lines().for_each(|l| println!(" {}", l)); } } ); build(dbg!(&blocks)); pandoc })); pandoc.execute().unwrap(); Ok(()) }