diff options
author | Federico Igne <git@federicoigne.com> | 2022-08-28 19:22:50 +0100 |
---|---|---|
committer | Federico Igne <git@federicoigne.com> | 2022-08-28 19:22:50 +0100 |
commit | 6f8fa69e1ad8ad68c8d1afa455b414cd459bb498 (patch) | |
tree | 7630321003324fae99b789ea2f88102047eb3b70 | |
parent | 76328b1334e1429e1a2719af2d041ac5fa86dd0e (diff) | |
download | pangler-6f8fa69e1ad8ad68c8d1afa455b414cd459bb498.tar.gz pangler-6f8fa69e1ad8ad68c8d1afa455b414cd459bb498.zip |
refactor: remove old v0.1.0 sources
-rw-r--r-- | v0.1.0/Cargo.toml | 14 | ||||
-rw-r--r-- | v0.1.0/src/main.rs | 170 |
2 files changed, 0 insertions, 184 deletions
diff --git a/v0.1.0/Cargo.toml b/v0.1.0/Cargo.toml deleted file mode 100644 index 50660af..0000000 --- a/v0.1.0/Cargo.toml +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | [package] | ||
2 | name = "pangler" | ||
3 | version = "0.1.0" | ||
4 | edition = "2021" | ||
5 | |||
6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
7 | |||
8 | [dependencies] | ||
9 | lazy_static = "1.4" | ||
10 | regex = "1.5" | ||
11 | pandoc = "0.8" | ||
12 | pandoc_ast = "0.8" | ||
13 | clap = { version = "3.1", features = ["derive"] } | ||
14 | |||
diff --git a/v0.1.0/src/main.rs b/v0.1.0/src/main.rs deleted file mode 100644 index ea4e4f7..0000000 --- a/v0.1.0/src/main.rs +++ /dev/null | |||
@@ -1,170 +0,0 @@ | |||
1 | use clap::Parser; | ||
2 | use lazy_static::lazy_static; | ||
3 | use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc}; | ||
4 | use pandoc_ast::Block; | ||
5 | use regex::{Captures,Regex}; | ||
6 | use std::borrow::Cow; | ||
7 | use std::collections::HashMap; | ||
8 | use std::fs; | ||
9 | use std::io::Result; | ||
10 | use std::path::{Path,PathBuf}; | ||
11 | |||
12 | const BASE: &str = "code"; | ||
13 | |||
14 | type Blocks<'a> = HashMap<String,Cow<'a,str>>; | ||
15 | |||
16 | /// A tangler for Literate Programming in Pandoc | ||
17 | #[derive(Parser, Debug)] | ||
18 | #[clap(author, version, about, long_about = None)] | ||
19 | struct Config { | ||
20 | /// Maximum substitution depth | ||
21 | #[clap(short, long, default_value_t = 10)] | ||
22 | depth: u32, | ||
23 | |||
24 | /// Base output directory [default: './code'] | ||
25 | #[clap(short, long)] | ||
26 | output: Option<PathBuf>, | ||
27 | |||
28 | /// Input files | ||
29 | input: Vec<PathBuf>, | ||
30 | } | ||
31 | |||
32 | /* Write code to target file */ | ||
33 | fn write_to_file<P: AsRef<Path>>(base: &Option<PathBuf>, path: P, content: &str) -> std::io::Result<()> { | ||
34 | if path.as_ref().is_relative() { | ||
35 | let path = base.clone().unwrap_or(PathBuf::from(BASE)).join(path); | ||
36 | /* There is always *at least* the base directory as a parent */ | ||
37 | fs::create_dir_all(path.parent().unwrap())?; | ||
38 | fs::write(path, content)?; | ||
39 | } else { | ||
40 | eprintln!("Absolute paths not supported: {}", path.as_ref().to_string_lossy()) | ||
41 | } | ||
42 | Ok(()) | ||
43 | } | ||
44 | |||
45 | /* Indent block of code */ | ||
46 | fn indent<'a>(input: Cow<'a,str>, indent: usize) -> Cow<'a,str> { | ||
47 | if indent > 0 { | ||
48 | let prefix = format!("{:indent$}", ""); | ||
49 | let mut output = String::with_capacity(input.len() + indent*input.lines().count()); | ||
50 | input.lines().enumerate().for_each(|(i,line)| { | ||
51 | if i > 0 { | ||
52 | output.push('\n'); | ||
53 | } | ||
54 | if !line.is_empty() { | ||
55 | output.push_str(&prefix); | ||
56 | output.push_str(line); | ||
57 | } | ||
58 | }); | ||
59 | Cow::Owned(output) | ||
60 | } else { | ||
61 | input | ||
62 | } | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Here are some notes on the following function | ||
67 | * | ||
68 | * lazy_static! { | ||
69 | * static ref MACRO: Regex = Regex::new(r"regex").unwrap(); | ||
70 | * } | ||
71 | * | ||
72 | * let mut text = Cow::from("This is some text..."); | ||
73 | * while MACRO.is_match(&text) { | ||
74 | * text = MACRO.replace_all(&text, _closure); | ||
75 | * } | ||
76 | * | ||
77 | * The problem with this version is that due to how `Cow` works, the value returned by | ||
78 | * `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is | ||
79 | * because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes | ||
80 | * place, so for the returned value to be valid, `text` still needs to be available. | ||
81 | * But text gets overridden right away, so, in principle, if no replacement takes place `text` | ||
82 | * gets overridden by a reference to it (losing data). | ||
83 | * | ||
84 | * Note that this doesn't happen in practice (but the compiler doesn't know about this) because | ||
85 | * the `replace_all` function is applied as long as some replacement is possible (`while` | ||
86 | * condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value. | ||
87 | * | ||
88 | * This is how you would solve the problem instead: | ||
89 | * | ||
90 | * while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) { | ||
91 | * text = Cow::from(new_text); | ||
92 | * } | ||
93 | * | ||
94 | * In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`) | ||
95 | * of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using | ||
96 | * the `Cow::from()` function. No heap allocation is performed, and the string is not copied. | ||
97 | */ | ||
98 | fn build(base: &Option<PathBuf>, blocks: &Blocks, depth: u32) { | ||
99 | lazy_static! { | ||
100 | static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap(); | ||
101 | static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap(); | ||
102 | } | ||
103 | blocks.iter().for_each(|(k,v)| if PATH.is_match(k) { | ||
104 | let mut d = 0; | ||
105 | let mut code = v.clone(); // No clone is happening because the value is a `Borrowed` | ||
106 | // Here `replace_all` returns a `Owned` value only when a replacement takes place. | ||
107 | // We can use it to recursively build blocks of code until no more substitutions are | ||
108 | // necessary (i.e., `replace_all` returns a `Borrowed`). | ||
109 | while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| { | ||
110 | let block = if d < depth { | ||
111 | blocks.get(&caps[2]).expect("Block not present").clone() | ||
112 | } else { | ||
113 | eprintln!("Reached maximum depth, output might be truncated. Increase `--depth` accordingly."); | ||
114 | Cow::Owned(String::from("")) | ||
115 | }; | ||
116 | indent(block, caps[1].len()) | ||
117 | }) { | ||
118 | code = Cow::from(step); | ||
119 | d += 1; | ||
120 | } | ||
121 | write_to_file(base, k, &code).expect("Unable to write to file"); | ||
122 | }) | ||
123 | } | ||
124 | |||
125 | fn main() -> Result<()> { | ||
126 | let config = Config::parse(); | ||
127 | let mut pandoc = Pandoc::new(); | ||
128 | /* Pandoc input setup */ | ||
129 | pandoc.set_input(InputKind::Files(config.input)); | ||
130 | pandoc.set_input_format(InputFormat::Markdown, vec![]); | ||
131 | /* Pandoc output setup */ | ||
132 | pandoc.set_output(OutputKind::Pipe); | ||
133 | pandoc.set_output_format(OutputFormat::Json, vec![]); | ||
134 | /* Process literate program */ | ||
135 | pandoc.add_filter(move |json| pandoc_ast::filter(json, |pandoc| { | ||
136 | let mut blocks: Blocks = HashMap::new(); | ||
137 | pandoc.blocks.iter().for_each(|block| | ||
138 | if let Block::CodeBlock((id,classes,attrs), code) = block { | ||
139 | /* Only process blocks with an ID */ | ||
140 | if !id.is_empty() { | ||
141 | let key = { | ||
142 | if let Some(path) = attrs.iter().find(|(k,_)| k == "path") { | ||
143 | format!("{}{}", path.1, id) | ||
144 | } else { | ||
145 | id.to_string() | ||
146 | } | ||
147 | }; | ||
148 | /* Insert (or replace) block of code. In case of ID clash, the standard | ||
149 | * behaviour is to append the new code to the existing snippet. Use the class | ||
150 | * `.override` to override the previously encountered snippets. | ||
151 | */ | ||
152 | if classes.iter().any(|c| c == "override") { | ||
153 | blocks.insert(key, Cow::from(code)); | ||
154 | } else { | ||
155 | blocks.entry(key) | ||
156 | .and_modify(|s| { *s += "\n"; *s += Cow::from(code) }) | ||
157 | .or_insert(Cow::from(code)); | ||
158 | } | ||
159 | } else { | ||
160 | eprintln!("Ignoring codeblock without ID:"); | ||
161 | eprintln!("{}", indent(Cow::from(code),4)); | ||
162 | } | ||
163 | } | ||
164 | ); | ||
165 | build(&config.output, &blocks, config.depth); | ||
166 | pandoc | ||
167 | })); | ||
168 | pandoc.execute().unwrap(); | ||
169 | Ok(()) | ||
170 | } | ||