From 76328b1334e1429e1a2719af2d041ac5fa86dd0e Mon Sep 17 00:00:00 2001 From: Federico Igne Date: Sun, 28 Aug 2022 17:07:34 +0100 Subject: feat(source): provide tangled code alongside the literate program On one hand this defeats the purpose of having the literate program, but it avoids the awkward "bootstrapping" experience. --- src/main.rs | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 src/main.rs (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..02fbcd4 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,165 @@ +use clap::Parser; +use pandoc::{ + InputFormat,InputKind,OutputFormat,OutputKind,Pandoc +}; +use pandoc_ast::Block; +use std::borrow::Cow; +use std::collections::HashMap; +use lazy_static::lazy_static; +use regex::{Captures,Regex}; +use std::fs; +use std::io::Result; +use std::path::PathBuf; + +const BASE: &str = "./"; + +/// A tangler for Literate Programming in Pandoc +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Config { + /// Maximum substitution depth + #[clap(short, long, default_value_t = 10)] + depth: u32, + /// Base output directory [default: './'] + #[clap(short, long)] + output: Option, + /// Input files + input: Vec, +} + +type Blocks<'a> = HashMap>; + +fn build( + base: &Option, + blocks: &Blocks, + max_depth: u32 +) { + lazy_static! { + static ref PATH: Regex = + Regex::new( + r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$" + ).unwrap(); + static ref MACRO: Regex = + Regex::new( + r"(?m)^([[:blank:]]*)<<([^>\s]+)>>" + ).unwrap(); + } + blocks + .iter() + .for_each(|(path,code)| if PATH.is_match(path) { + let mut current_depth = 0; + let mut code = code.clone(); + while let Cow::Owned(new_code) = MACRO.replace_all( + &code, + |caps: &Captures| { + if current_depth < max_depth { + let block = blocks + .get(&caps[2]) + .expect("Block not present") + .clone(); + indent(block, caps[1].len()) + } else { + eprintln!("Reached maximum depth, \ + output might be truncated.\n\ + Increase `--depth` accordingly."); + Cow::Owned(String::from("")) + } + } + ) { + code = Cow::from(new_code); + current_depth += 1; + } + let file = base + .clone() + .unwrap_or(PathBuf::from(BASE)) + .join(path); + write_to_file(file, &code) + .expect("Unable to write to file"); + }) +} + +fn indent<'a>( + input: Cow<'a,str>, + indent: usize +) -> Cow<'a,str> { + if indent > 0 { + let prefix = format!("{:indent$}", ""); + let size = input.len() + indent*input.lines().count(); + let mut output = String::with_capacity(size); + input.lines().enumerate().for_each(|(i,line)| { + if i > 0 { + output.push('\n'); + } + if !line.is_empty() { + output.push_str(&prefix); + output.push_str(line); + } + }); + Cow::Owned(output) + } else { + input + } +} + +fn write_to_file( + path: PathBuf, content: &str +) -> std::io::Result<()> { + if path.is_relative() { + fs::create_dir_all(path.parent().unwrap())?; + fs::write(path, content)?; + } else { + eprintln!( + "Absolute paths not supported: {}", + path.to_string_lossy() + ) + } + Ok(()) +} + + +fn main() -> Result<()> { + let config = Config::parse(); + let mut pandoc = Pandoc::new(); + pandoc.set_input(InputKind::Files(config.input)); + pandoc.set_input_format(InputFormat::Markdown, vec![]); + pandoc.set_output(OutputKind::Pipe); + pandoc.set_output_format(OutputFormat::Json, vec![]); + pandoc.add_filter( + move |json| pandoc_ast::filter(json, + |pandoc| { + let mut blocks: Blocks = HashMap::new(); + pandoc.blocks.iter().for_each(|block| + if let Block::CodeBlock((id,clss,attrs), code) = block { + if !id.is_empty() { + let key = { + let path = attrs.iter().find(|(k,_)| k == "path"); + if let Some(path) = path { + format!("{}{}", path.1, id) + } else { + id.to_string() + } + }; + if clss.iter().any(|c| c == "override") { + blocks.insert(key, Cow::from(code)); + } else { + blocks.entry(key) + .and_modify(|s| { + *s += "\n"; + *s += Cow::from(code) + }) + .or_insert(Cow::from(code)); + } + } else { + eprintln!("Ignoring code block without ID:"); + eprintln!("{}", indent(Cow::from(code),4)); + } + } + ); + build(&config.output, &blocks, config.depth); + pandoc + } + ) + ); + pandoc.execute().unwrap(); + Ok(()) +} \ No newline at end of file -- cgit v1.2.3