1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
use std::io::Result;
use std::borrow::Cow;
use std::path::PathBuf;
use std::collections::HashMap;
use lazy_static::lazy_static;
use regex::{Captures,Regex};
use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc};
use pandoc_ast::Block;
type Blocks<'a> = HashMap<String,Cow<'a,str>>;
/*
* Here are some notes on the following function
*
* lazy_static! {
* static ref MACRO: Regex = Regex::new(r"regex").unwrap();
* }
*
* let mut text = Cow::from("This is some text...");
*
* The problem with this version is that due to how `Cow` works, the value returned by
* `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is
* because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes
* place, so for the returned value to be valid, `text` still needs to be available.
* But text gets overridden right away, so, in principle, if no replacement takes place `text`
* gets overridden by a reference to it (losing data).
*
* Note that this doesn't happen in practice (but the compiler doesn't know about this) because
* the `replace_all` function is applied as long as some replacement is possible (`while`
* condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value.
*
* while MACRO.is_match(&text) {
* text = MACRO.replace_all(&text, _closure);
* }
*
* This is how you would solve the problem instead:
*
* while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) {
* text = Cow::from(new_text);
* }
*
* In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`)
* of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using
* the `Cow::from()` function. No heap allocation is performed, and the string is not copied.
*
* println!("{}", text)
*
*/
fn build(blocks: &Blocks) {
lazy_static! {
static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap();
static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap();
}
blocks.iter().for_each(|(k,v)| if PATH.is_match(k) {
let mut code = v.clone(); // No clone is happening because the value is a `Borrowed`
// Here `replace_all` returns a `Owned` value only when a replacement takes place.
// We can use it to recursively build blocks of code until no more substitutions are
// necessary (i.e., `replace_all` returns a `Borrowed`).
while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| {
let indent = caps[1].len();
blocks.get(&caps[2])
.expect("Block not present")
.lines()
.map(|l| format!("{:indent$}{}", "", l) )
.collect::<Vec<_>>()
.join("\n")
}
) {
code = Cow::from(step);
}
println!("[[{}]]\n{}", k, code);
})
}
fn main() -> Result<()> {
let mut pandoc = Pandoc::new();
/* Pandoc input setup */
pandoc.set_input(InputKind::Files(vec![PathBuf::from("test.md")]));
pandoc.set_input_format(InputFormat::Markdown, vec![]);
/* Pandoc output setup */
pandoc.set_output(OutputKind::Pipe);
pandoc.set_output_format(OutputFormat::Json, vec![]);
/* Process literate program */
pandoc.add_filter(|json| pandoc_ast::filter(json, |pandoc| {
let mut blocks: Blocks = HashMap::new();
pandoc.blocks.iter().for_each(|block|
if let Block::CodeBlock((id,classes,attrs), code) = block {
// dbg!(block);
if !id.is_empty() {
let mut key = attrs.iter()
.find_map(|(k,v)| if k == "path" { Some(v.clone()) } else { None })
.unwrap_or(String::from(""));
key.push_str(id);
/* Insert (or replace) block of code. */
if classes.iter().any(|c| c == "override") {
blocks.insert(key, Cow::from(code));
} else {
blocks.entry(key)
.and_modify(|s| { *s += "\n"; *s += Cow::from(code) })
.or_insert(Cow::from(code));
}
} else {
// println!("The following code has no ID:");
// code.lines().for_each(|l| println!(" {}", l));
}
}
);
build(dbg!(&blocks));
pandoc
}));
pandoc.execute().unwrap();
Ok(())
}
|