diff options
author | Federico Igne <git@federicoigne.com> | 2022-08-28 17:07:34 +0100 |
---|---|---|
committer | Federico Igne <git@federicoigne.com> | 2022-08-28 17:07:34 +0100 |
commit | 76328b1334e1429e1a2719af2d041ac5fa86dd0e (patch) | |
tree | a2f4145bd9e242a20f1f70993ef28bac24fc3e00 | |
parent | 3f0792e3fb1a17d99147bf2cb8ac8d2442d2cdb6 (diff) | |
download | pangler-76328b1334e1429e1a2719af2d041ac5fa86dd0e.tar.gz pangler-76328b1334e1429e1a2719af2d041ac5fa86dd0e.zip |
feat(source): provide tangled code alongside the literate programv0.3.0
On one hand this defeats the purpose of having the literate program, but
it avoids the awkward "bootstrapping" experience.
-rw-r--r-- | Cargo.toml | 11 | ||||
-rw-r--r-- | README.md | 16 | ||||
-rwxr-xr-x | bin/pangler-v0.1.0 | bin | 3588064 -> 0 bytes | |||
-rw-r--r-- | src/main.rs | 165 | ||||
-rw-r--r-- | v0.1.0/Cargo.toml | 14 | ||||
-rw-r--r-- | v0.1.0/src/main.rs | 170 |
6 files changed, 365 insertions, 11 deletions
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..644ee68 --- /dev/null +++ b/Cargo.toml | |||
@@ -0,0 +1,11 @@ | |||
1 | [package] | ||
2 | name = "pangler" | ||
3 | version = "0.3.0" | ||
4 | edition = "2021" | ||
5 | |||
6 | [dependencies] | ||
7 | clap = { version = "3.1", features = ["derive"] } | ||
8 | pandoc = "0.8" | ||
9 | pandoc_ast = "0.8" | ||
10 | lazy_static = "1.4" | ||
11 | regex = "1.5" \ No newline at end of file | ||
@@ -124,21 +124,15 @@ if clss.iter().any(|c| c == "override") { | |||
124 | 124 | ||
125 | ## Tangling: generating the source files | 125 | ## Tangling: generating the source files |
126 | 126 | ||
127 | To bootstrap the tangling process, an early version of `pangler` is provided under `bin/` in this repository. | 127 | To bootstrap the tangling process, a tangled version of the program is provided alongside the literate version. |
128 | 128 | ||
129 | You can generate the code for the current version of the program, in the current working directory, with | 129 | The executable can be compiled from the root of the project with |
130 | |||
131 | ```sh | ||
132 | ./bin/pangler-v0.1.0 README.md | ||
133 | ``` | ||
134 | |||
135 | and compile it with | ||
136 | 130 | ||
137 | ```sh | 131 | ```sh |
138 | cargo build --release | 132 | cargo build --release |
139 | ``` | 133 | ``` |
140 | 134 | ||
141 | From now on you can make changes to the `README.md` file and use the latest version of `pangler` to tangle and compile it. | 135 | From now on you can make changes to the `README.md` file and use your latest compiled version of `pangler` to tangle and compile it. |
142 | 136 | ||
143 | ## Weaving: generating the documentation | 137 | ## Weaving: generating the documentation |
144 | 138 | ||
@@ -565,12 +559,12 @@ fn write_to_file( | |||
565 | 559 | ||
566 | # Credits | 560 | # Credits |
567 | 561 | ||
568 | `pangler v0.2.0` was created by Federico Igne (git@federicoigne.com) and available at [`https://git.dyamon.me/projects/pangler`](https://git.dyamon.me/projects/pangler). | 562 | `pangler` was created by Federico Igne (git@federicoigne.com) and available at [`https://git.dyamon.me/projects/pangler`](https://git.dyamon.me/projects/pangler). |
569 | 563 | ||
570 | ```{#Cargo.toml .toml} | 564 | ```{#Cargo.toml .toml} |
571 | [package] | 565 | [package] |
572 | name = "pangler" | 566 | name = "pangler" |
573 | version = "0.2.0" | 567 | version = "0.3.0" |
574 | edition = "2021" | 568 | edition = "2021" |
575 | 569 | ||
576 | [dependencies] | 570 | [dependencies] |
diff --git a/bin/pangler-v0.1.0 b/bin/pangler-v0.1.0 deleted file mode 100755 index fd07616..0000000 --- a/bin/pangler-v0.1.0 +++ /dev/null | |||
Binary files differ | |||
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..02fbcd4 --- /dev/null +++ b/src/main.rs | |||
@@ -0,0 +1,165 @@ | |||
1 | use clap::Parser; | ||
2 | use pandoc::{ | ||
3 | InputFormat,InputKind,OutputFormat,OutputKind,Pandoc | ||
4 | }; | ||
5 | use pandoc_ast::Block; | ||
6 | use std::borrow::Cow; | ||
7 | use std::collections::HashMap; | ||
8 | use lazy_static::lazy_static; | ||
9 | use regex::{Captures,Regex}; | ||
10 | use std::fs; | ||
11 | use std::io::Result; | ||
12 | use std::path::PathBuf; | ||
13 | |||
14 | const BASE: &str = "./"; | ||
15 | |||
16 | /// A tangler for Literate Programming in Pandoc | ||
17 | #[derive(Parser, Debug)] | ||
18 | #[clap(author, version, about, long_about = None)] | ||
19 | struct Config { | ||
20 | /// Maximum substitution depth | ||
21 | #[clap(short, long, default_value_t = 10)] | ||
22 | depth: u32, | ||
23 | /// Base output directory [default: './'] | ||
24 | #[clap(short, long)] | ||
25 | output: Option<PathBuf>, | ||
26 | /// Input files | ||
27 | input: Vec<PathBuf>, | ||
28 | } | ||
29 | |||
30 | type Blocks<'a> = HashMap<String,Cow<'a,str>>; | ||
31 | |||
32 | fn build( | ||
33 | base: &Option<PathBuf>, | ||
34 | blocks: &Blocks, | ||
35 | max_depth: u32 | ||
36 | ) { | ||
37 | lazy_static! { | ||
38 | static ref PATH: Regex = | ||
39 | Regex::new( | ||
40 | r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$" | ||
41 | ).unwrap(); | ||
42 | static ref MACRO: Regex = | ||
43 | Regex::new( | ||
44 | r"(?m)^([[:blank:]]*)<<([^>\s]+)>>" | ||
45 | ).unwrap(); | ||
46 | } | ||
47 | blocks | ||
48 | .iter() | ||
49 | .for_each(|(path,code)| if PATH.is_match(path) { | ||
50 | let mut current_depth = 0; | ||
51 | let mut code = code.clone(); | ||
52 | while let Cow::Owned(new_code) = MACRO.replace_all( | ||
53 | &code, | ||
54 | |caps: &Captures| { | ||
55 | if current_depth < max_depth { | ||
56 | let block = blocks | ||
57 | .get(&caps[2]) | ||
58 | .expect("Block not present") | ||
59 | .clone(); | ||
60 | indent(block, caps[1].len()) | ||
61 | } else { | ||
62 | eprintln!("Reached maximum depth, \ | ||
63 | output might be truncated.\n\ | ||
64 | Increase `--depth` accordingly."); | ||
65 | Cow::Owned(String::from("")) | ||
66 | } | ||
67 | } | ||
68 | ) { | ||
69 | code = Cow::from(new_code); | ||
70 | current_depth += 1; | ||
71 | } | ||
72 | let file = base | ||
73 | .clone() | ||
74 | .unwrap_or(PathBuf::from(BASE)) | ||
75 | .join(path); | ||
76 | write_to_file(file, &code) | ||
77 | .expect("Unable to write to file"); | ||
78 | }) | ||
79 | } | ||
80 | |||
81 | fn indent<'a>( | ||
82 | input: Cow<'a,str>, | ||
83 | indent: usize | ||
84 | ) -> Cow<'a,str> { | ||
85 | if indent > 0 { | ||
86 | let prefix = format!("{:indent$}", ""); | ||
87 | let size = input.len() + indent*input.lines().count(); | ||
88 | let mut output = String::with_capacity(size); | ||
89 | input.lines().enumerate().for_each(|(i,line)| { | ||
90 | if i > 0 { | ||
91 | output.push('\n'); | ||
92 | } | ||
93 | if !line.is_empty() { | ||
94 | output.push_str(&prefix); | ||
95 | output.push_str(line); | ||
96 | } | ||
97 | }); | ||
98 | Cow::Owned(output) | ||
99 | } else { | ||
100 | input | ||
101 | } | ||
102 | } | ||
103 | |||
104 | fn write_to_file( | ||
105 | path: PathBuf, content: &str | ||
106 | ) -> std::io::Result<()> { | ||
107 | if path.is_relative() { | ||
108 | fs::create_dir_all(path.parent().unwrap())?; | ||
109 | fs::write(path, content)?; | ||
110 | } else { | ||
111 | eprintln!( | ||
112 | "Absolute paths not supported: {}", | ||
113 | path.to_string_lossy() | ||
114 | ) | ||
115 | } | ||
116 | Ok(()) | ||
117 | } | ||
118 | |||
119 | |||
120 | fn main() -> Result<()> { | ||
121 | let config = Config::parse(); | ||
122 | let mut pandoc = Pandoc::new(); | ||
123 | pandoc.set_input(InputKind::Files(config.input)); | ||
124 | pandoc.set_input_format(InputFormat::Markdown, vec![]); | ||
125 | pandoc.set_output(OutputKind::Pipe); | ||
126 | pandoc.set_output_format(OutputFormat::Json, vec![]); | ||
127 | pandoc.add_filter( | ||
128 | move |json| pandoc_ast::filter(json, | ||
129 | |pandoc| { | ||
130 | let mut blocks: Blocks = HashMap::new(); | ||
131 | pandoc.blocks.iter().for_each(|block| | ||
132 | if let Block::CodeBlock((id,clss,attrs), code) = block { | ||
133 | if !id.is_empty() { | ||
134 | let key = { | ||
135 | let path = attrs.iter().find(|(k,_)| k == "path"); | ||
136 | if let Some(path) = path { | ||
137 | format!("{}{}", path.1, id) | ||
138 | } else { | ||
139 | id.to_string() | ||
140 | } | ||
141 | }; | ||
142 | if clss.iter().any(|c| c == "override") { | ||
143 | blocks.insert(key, Cow::from(code)); | ||
144 | } else { | ||
145 | blocks.entry(key) | ||
146 | .and_modify(|s| { | ||
147 | *s += "\n"; | ||
148 | *s += Cow::from(code) | ||
149 | }) | ||
150 | .or_insert(Cow::from(code)); | ||
151 | } | ||
152 | } else { | ||
153 | eprintln!("Ignoring code block without ID:"); | ||
154 | eprintln!("{}", indent(Cow::from(code),4)); | ||
155 | } | ||
156 | } | ||
157 | ); | ||
158 | build(&config.output, &blocks, config.depth); | ||
159 | pandoc | ||
160 | } | ||
161 | ) | ||
162 | ); | ||
163 | pandoc.execute().unwrap(); | ||
164 | Ok(()) | ||
165 | } \ No newline at end of file | ||
diff --git a/v0.1.0/Cargo.toml b/v0.1.0/Cargo.toml new file mode 100644 index 0000000..50660af --- /dev/null +++ b/v0.1.0/Cargo.toml | |||
@@ -0,0 +1,14 @@ | |||
1 | [package] | ||
2 | name = "pangler" | ||
3 | version = "0.1.0" | ||
4 | edition = "2021" | ||
5 | |||
6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
7 | |||
8 | [dependencies] | ||
9 | lazy_static = "1.4" | ||
10 | regex = "1.5" | ||
11 | pandoc = "0.8" | ||
12 | pandoc_ast = "0.8" | ||
13 | clap = { version = "3.1", features = ["derive"] } | ||
14 | |||
diff --git a/v0.1.0/src/main.rs b/v0.1.0/src/main.rs new file mode 100644 index 0000000..ea4e4f7 --- /dev/null +++ b/v0.1.0/src/main.rs | |||
@@ -0,0 +1,170 @@ | |||
1 | use clap::Parser; | ||
2 | use lazy_static::lazy_static; | ||
3 | use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc}; | ||
4 | use pandoc_ast::Block; | ||
5 | use regex::{Captures,Regex}; | ||
6 | use std::borrow::Cow; | ||
7 | use std::collections::HashMap; | ||
8 | use std::fs; | ||
9 | use std::io::Result; | ||
10 | use std::path::{Path,PathBuf}; | ||
11 | |||
12 | const BASE: &str = "code"; | ||
13 | |||
14 | type Blocks<'a> = HashMap<String,Cow<'a,str>>; | ||
15 | |||
16 | /// A tangler for Literate Programming in Pandoc | ||
17 | #[derive(Parser, Debug)] | ||
18 | #[clap(author, version, about, long_about = None)] | ||
19 | struct Config { | ||
20 | /// Maximum substitution depth | ||
21 | #[clap(short, long, default_value_t = 10)] | ||
22 | depth: u32, | ||
23 | |||
24 | /// Base output directory [default: './code'] | ||
25 | #[clap(short, long)] | ||
26 | output: Option<PathBuf>, | ||
27 | |||
28 | /// Input files | ||
29 | input: Vec<PathBuf>, | ||
30 | } | ||
31 | |||
32 | /* Write code to target file */ | ||
33 | fn write_to_file<P: AsRef<Path>>(base: &Option<PathBuf>, path: P, content: &str) -> std::io::Result<()> { | ||
34 | if path.as_ref().is_relative() { | ||
35 | let path = base.clone().unwrap_or(PathBuf::from(BASE)).join(path); | ||
36 | /* There is always *at least* the base directory as a parent */ | ||
37 | fs::create_dir_all(path.parent().unwrap())?; | ||
38 | fs::write(path, content)?; | ||
39 | } else { | ||
40 | eprintln!("Absolute paths not supported: {}", path.as_ref().to_string_lossy()) | ||
41 | } | ||
42 | Ok(()) | ||
43 | } | ||
44 | |||
45 | /* Indent block of code */ | ||
46 | fn indent<'a>(input: Cow<'a,str>, indent: usize) -> Cow<'a,str> { | ||
47 | if indent > 0 { | ||
48 | let prefix = format!("{:indent$}", ""); | ||
49 | let mut output = String::with_capacity(input.len() + indent*input.lines().count()); | ||
50 | input.lines().enumerate().for_each(|(i,line)| { | ||
51 | if i > 0 { | ||
52 | output.push('\n'); | ||
53 | } | ||
54 | if !line.is_empty() { | ||
55 | output.push_str(&prefix); | ||
56 | output.push_str(line); | ||
57 | } | ||
58 | }); | ||
59 | Cow::Owned(output) | ||
60 | } else { | ||
61 | input | ||
62 | } | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Here are some notes on the following function | ||
67 | * | ||
68 | * lazy_static! { | ||
69 | * static ref MACRO: Regex = Regex::new(r"regex").unwrap(); | ||
70 | * } | ||
71 | * | ||
72 | * let mut text = Cow::from("This is some text..."); | ||
73 | * while MACRO.is_match(&text) { | ||
74 | * text = MACRO.replace_all(&text, _closure); | ||
75 | * } | ||
76 | * | ||
77 | * The problem with this version is that due to how `Cow` works, the value returned by | ||
78 | * `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is | ||
79 | * because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes | ||
80 | * place, so for the returned value to be valid, `text` still needs to be available. | ||
81 | * But text gets overridden right away, so, in principle, if no replacement takes place `text` | ||
82 | * gets overridden by a reference to it (losing data). | ||
83 | * | ||
84 | * Note that this doesn't happen in practice (but the compiler doesn't know about this) because | ||
85 | * the `replace_all` function is applied as long as some replacement is possible (`while` | ||
86 | * condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value. | ||
87 | * | ||
88 | * This is how you would solve the problem instead: | ||
89 | * | ||
90 | * while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) { | ||
91 | * text = Cow::from(new_text); | ||
92 | * } | ||
93 | * | ||
94 | * In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`) | ||
95 | * of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using | ||
96 | * the `Cow::from()` function. No heap allocation is performed, and the string is not copied. | ||
97 | */ | ||
98 | fn build(base: &Option<PathBuf>, blocks: &Blocks, depth: u32) { | ||
99 | lazy_static! { | ||
100 | static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap(); | ||
101 | static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap(); | ||
102 | } | ||
103 | blocks.iter().for_each(|(k,v)| if PATH.is_match(k) { | ||
104 | let mut d = 0; | ||
105 | let mut code = v.clone(); // No clone is happening because the value is a `Borrowed` | ||
106 | // Here `replace_all` returns a `Owned` value only when a replacement takes place. | ||
107 | // We can use it to recursively build blocks of code until no more substitutions are | ||
108 | // necessary (i.e., `replace_all` returns a `Borrowed`). | ||
109 | while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| { | ||
110 | let block = if d < depth { | ||
111 | blocks.get(&caps[2]).expect("Block not present").clone() | ||
112 | } else { | ||
113 | eprintln!("Reached maximum depth, output might be truncated. Increase `--depth` accordingly."); | ||
114 | Cow::Owned(String::from("")) | ||
115 | }; | ||
116 | indent(block, caps[1].len()) | ||
117 | }) { | ||
118 | code = Cow::from(step); | ||
119 | d += 1; | ||
120 | } | ||
121 | write_to_file(base, k, &code).expect("Unable to write to file"); | ||
122 | }) | ||
123 | } | ||
124 | |||
125 | fn main() -> Result<()> { | ||
126 | let config = Config::parse(); | ||
127 | let mut pandoc = Pandoc::new(); | ||
128 | /* Pandoc input setup */ | ||
129 | pandoc.set_input(InputKind::Files(config.input)); | ||
130 | pandoc.set_input_format(InputFormat::Markdown, vec![]); | ||
131 | /* Pandoc output setup */ | ||
132 | pandoc.set_output(OutputKind::Pipe); | ||
133 | pandoc.set_output_format(OutputFormat::Json, vec![]); | ||
134 | /* Process literate program */ | ||
135 | pandoc.add_filter(move |json| pandoc_ast::filter(json, |pandoc| { | ||
136 | let mut blocks: Blocks = HashMap::new(); | ||
137 | pandoc.blocks.iter().for_each(|block| | ||
138 | if let Block::CodeBlock((id,classes,attrs), code) = block { | ||
139 | /* Only process blocks with an ID */ | ||
140 | if !id.is_empty() { | ||
141 | let key = { | ||
142 | if let Some(path) = attrs.iter().find(|(k,_)| k == "path") { | ||
143 | format!("{}{}", path.1, id) | ||
144 | } else { | ||
145 | id.to_string() | ||
146 | } | ||
147 | }; | ||
148 | /* Insert (or replace) block of code. In case of ID clash, the standard | ||
149 | * behaviour is to append the new code to the existing snippet. Use the class | ||
150 | * `.override` to override the previously encountered snippets. | ||
151 | */ | ||
152 | if classes.iter().any(|c| c == "override") { | ||
153 | blocks.insert(key, Cow::from(code)); | ||
154 | } else { | ||
155 | blocks.entry(key) | ||
156 | .and_modify(|s| { *s += "\n"; *s += Cow::from(code) }) | ||
157 | .or_insert(Cow::from(code)); | ||
158 | } | ||
159 | } else { | ||
160 | eprintln!("Ignoring codeblock without ID:"); | ||
161 | eprintln!("{}", indent(Cow::from(code),4)); | ||
162 | } | ||
163 | } | ||
164 | ); | ||
165 | build(&config.output, &blocks, config.depth); | ||
166 | pandoc | ||
167 | })); | ||
168 | pandoc.execute().unwrap(); | ||
169 | Ok(()) | ||
170 | } | ||