1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
use clap::Parser;
use lazy_static::lazy_static;
use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc};
use pandoc_ast::Block;
use regex::{Captures,Regex};
use std::borrow::Cow;
use std::collections::HashMap;
use std::fs;
use std::io::Result;
use std::path::{Path,PathBuf};
const BASE: &str = "code";
type Blocks<'a> = HashMap<String,Cow<'a,str>>;
/// A tangler for Literate Programming in Pandoc
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Config {
/// Maximum substitution depth
#[clap(short, long, default_value_t = 10)]
depth: u32,
/// Base output directory [default: './code']
#[clap(short, long)]
output: Option<PathBuf>,
/// Input files
input: Vec<PathBuf>,
}
/* Write code to target file */
fn write_to_file<P: AsRef<Path>>(base: &Option<PathBuf>, path: P, content: &str) -> std::io::Result<()> {
if path.as_ref().is_relative() {
let path = base.clone().unwrap_or(PathBuf::from(BASE)).join(path);
/* There is always *at least* the base directory as a parent */
fs::create_dir_all(path.parent().unwrap())?;
fs::write(path, content)?;
} else {
eprintln!("Absolute paths not supported: {}", path.as_ref().to_string_lossy())
}
Ok(())
}
/* Indent block of code */
fn indent<'a>(input: Cow<'a,str>, indent: usize) -> Cow<'a,str> {
if indent > 0 {
let prefix = format!("{:indent$}", "");
let mut output = String::with_capacity(input.len() + indent*input.lines().count());
input.lines().enumerate().for_each(|(i,line)| {
if i > 0 {
output.push('\n');
}
if !line.is_empty() {
output.push_str(&prefix);
output.push_str(line);
}
});
Cow::Owned(output)
} else {
input
}
}
/*
* Here are some notes on the following function
*
* lazy_static! {
* static ref MACRO: Regex = Regex::new(r"regex").unwrap();
* }
*
* let mut text = Cow::from("This is some text...");
* while MACRO.is_match(&text) {
* text = MACRO.replace_all(&text, _closure);
* }
*
* The problem with this version is that due to how `Cow` works, the value returned by
* `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is
* because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes
* place, so for the returned value to be valid, `text` still needs to be available.
* But text gets overridden right away, so, in principle, if no replacement takes place `text`
* gets overridden by a reference to it (losing data).
*
* Note that this doesn't happen in practice (but the compiler doesn't know about this) because
* the `replace_all` function is applied as long as some replacement is possible (`while`
* condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value.
*
* This is how you would solve the problem instead:
*
* while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) {
* text = Cow::from(new_text);
* }
*
* In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`)
* of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using
* the `Cow::from()` function. No heap allocation is performed, and the string is not copied.
*/
fn build(base: &Option<PathBuf>, blocks: &Blocks, depth: u32) {
lazy_static! {
static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap();
static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap();
}
blocks.iter().for_each(|(k,v)| if PATH.is_match(k) {
let mut d = 0;
let mut code = v.clone(); // No clone is happening because the value is a `Borrowed`
// Here `replace_all` returns a `Owned` value only when a replacement takes place.
// We can use it to recursively build blocks of code until no more substitutions are
// necessary (i.e., `replace_all` returns a `Borrowed`).
while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| {
let block = if d < depth {
blocks.get(&caps[2]).expect("Block not present").clone()
} else {
eprintln!("Reached maximum depth, output might be truncated. Increase `--depth` accordingly.");
Cow::Owned(String::from(""))
};
indent(block, caps[1].len())
}) {
code = Cow::from(step);
d += 1;
}
write_to_file(base, k, &code).expect("Unable to write to file");
})
}
fn main() -> Result<()> {
let config = Config::parse();
let mut pandoc = Pandoc::new();
/* Pandoc input setup */
pandoc.set_input(InputKind::Files(config.input));
pandoc.set_input_format(InputFormat::Markdown, vec![]);
/* Pandoc output setup */
pandoc.set_output(OutputKind::Pipe);
pandoc.set_output_format(OutputFormat::Json, vec![]);
/* Process literate program */
pandoc.add_filter(move |json| pandoc_ast::filter(json, |pandoc| {
let mut blocks: Blocks = HashMap::new();
pandoc.blocks.iter().for_each(|block|
if let Block::CodeBlock((id,classes,attrs), code) = block {
/* Only process blocks with an ID */
if !id.is_empty() {
let key = {
if let Some(path) = attrs.iter().find(|(k,_)| k == "path") {
format!("{}{}", path.1, id)
} else {
id.to_string()
}
};
/* Insert (or replace) block of code. In case of ID clash, the standard
* behaviour is to append the new code to the existing snippet. Use the class
* `.override` to override the previously encountered snippets.
*/
if classes.iter().any(|c| c == "override") {
blocks.insert(key, Cow::from(code));
} else {
blocks.entry(key)
.and_modify(|s| { *s += "\n"; *s += Cow::from(code) })
.or_insert(Cow::from(code));
}
} else {
eprintln!("Ignoring codeblock without ID:");
eprintln!("{}", indent(Cow::from(code),4));
}
}
);
build(&config.output, &blocks, config.depth);
pandoc
}));
pandoc.execute().unwrap();
Ok(())
}
|