diff options
| author | Federico Igne <git@federicoigne.com> | 2022-08-28 17:07:34 +0100 |
|---|---|---|
| committer | Federico Igne <git@federicoigne.com> | 2022-08-28 17:07:34 +0100 |
| commit | 76328b1334e1429e1a2719af2d041ac5fa86dd0e (patch) | |
| tree | a2f4145bd9e242a20f1f70993ef28bac24fc3e00 | |
| parent | 3f0792e3fb1a17d99147bf2cb8ac8d2442d2cdb6 (diff) | |
| download | pangler-76328b1334e1429e1a2719af2d041ac5fa86dd0e.tar.gz pangler-76328b1334e1429e1a2719af2d041ac5fa86dd0e.zip | |
feat(source): provide tangled code alongside the literate programv0.3.0
On one hand this defeats the purpose of having the literate program, but
it avoids the awkward "bootstrapping" experience.
| -rw-r--r-- | Cargo.toml | 11 | ||||
| -rw-r--r-- | README.md | 16 | ||||
| -rwxr-xr-x | bin/pangler-v0.1.0 | bin | 3588064 -> 0 bytes | |||
| -rw-r--r-- | src/main.rs | 165 | ||||
| -rw-r--r-- | v0.1.0/Cargo.toml | 14 | ||||
| -rw-r--r-- | v0.1.0/src/main.rs | 170 |
6 files changed, 365 insertions, 11 deletions
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..644ee68 --- /dev/null +++ b/Cargo.toml | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | [package] | ||
| 2 | name = "pangler" | ||
| 3 | version = "0.3.0" | ||
| 4 | edition = "2021" | ||
| 5 | |||
| 6 | [dependencies] | ||
| 7 | clap = { version = "3.1", features = ["derive"] } | ||
| 8 | pandoc = "0.8" | ||
| 9 | pandoc_ast = "0.8" | ||
| 10 | lazy_static = "1.4" | ||
| 11 | regex = "1.5" \ No newline at end of file | ||
| @@ -124,21 +124,15 @@ if clss.iter().any(|c| c == "override") { | |||
| 124 | 124 | ||
| 125 | ## Tangling: generating the source files | 125 | ## Tangling: generating the source files |
| 126 | 126 | ||
| 127 | To bootstrap the tangling process, an early version of `pangler` is provided under `bin/` in this repository. | 127 | To bootstrap the tangling process, a tangled version of the program is provided alongside the literate version. |
| 128 | 128 | ||
| 129 | You can generate the code for the current version of the program, in the current working directory, with | 129 | The executable can be compiled from the root of the project with |
| 130 | |||
| 131 | ```sh | ||
| 132 | ./bin/pangler-v0.1.0 README.md | ||
| 133 | ``` | ||
| 134 | |||
| 135 | and compile it with | ||
| 136 | 130 | ||
| 137 | ```sh | 131 | ```sh |
| 138 | cargo build --release | 132 | cargo build --release |
| 139 | ``` | 133 | ``` |
| 140 | 134 | ||
| 141 | From now on you can make changes to the `README.md` file and use the latest version of `pangler` to tangle and compile it. | 135 | From now on you can make changes to the `README.md` file and use your latest compiled version of `pangler` to tangle and compile it. |
| 142 | 136 | ||
| 143 | ## Weaving: generating the documentation | 137 | ## Weaving: generating the documentation |
| 144 | 138 | ||
| @@ -565,12 +559,12 @@ fn write_to_file( | |||
| 565 | 559 | ||
| 566 | # Credits | 560 | # Credits |
| 567 | 561 | ||
| 568 | `pangler v0.2.0` was created by Federico Igne (git@federicoigne.com) and available at [`https://git.dyamon.me/projects/pangler`](https://git.dyamon.me/projects/pangler). | 562 | `pangler` was created by Federico Igne (git@federicoigne.com) and available at [`https://git.dyamon.me/projects/pangler`](https://git.dyamon.me/projects/pangler). |
| 569 | 563 | ||
| 570 | ```{#Cargo.toml .toml} | 564 | ```{#Cargo.toml .toml} |
| 571 | [package] | 565 | [package] |
| 572 | name = "pangler" | 566 | name = "pangler" |
| 573 | version = "0.2.0" | 567 | version = "0.3.0" |
| 574 | edition = "2021" | 568 | edition = "2021" |
| 575 | 569 | ||
| 576 | [dependencies] | 570 | [dependencies] |
diff --git a/bin/pangler-v0.1.0 b/bin/pangler-v0.1.0 deleted file mode 100755 index fd07616..0000000 --- a/bin/pangler-v0.1.0 +++ /dev/null | |||
| Binary files differ | |||
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..02fbcd4 --- /dev/null +++ b/src/main.rs | |||
| @@ -0,0 +1,165 @@ | |||
| 1 | use clap::Parser; | ||
| 2 | use pandoc::{ | ||
| 3 | InputFormat,InputKind,OutputFormat,OutputKind,Pandoc | ||
| 4 | }; | ||
| 5 | use pandoc_ast::Block; | ||
| 6 | use std::borrow::Cow; | ||
| 7 | use std::collections::HashMap; | ||
| 8 | use lazy_static::lazy_static; | ||
| 9 | use regex::{Captures,Regex}; | ||
| 10 | use std::fs; | ||
| 11 | use std::io::Result; | ||
| 12 | use std::path::PathBuf; | ||
| 13 | |||
| 14 | const BASE: &str = "./"; | ||
| 15 | |||
| 16 | /// A tangler for Literate Programming in Pandoc | ||
| 17 | #[derive(Parser, Debug)] | ||
| 18 | #[clap(author, version, about, long_about = None)] | ||
| 19 | struct Config { | ||
| 20 | /// Maximum substitution depth | ||
| 21 | #[clap(short, long, default_value_t = 10)] | ||
| 22 | depth: u32, | ||
| 23 | /// Base output directory [default: './'] | ||
| 24 | #[clap(short, long)] | ||
| 25 | output: Option<PathBuf>, | ||
| 26 | /// Input files | ||
| 27 | input: Vec<PathBuf>, | ||
| 28 | } | ||
| 29 | |||
| 30 | type Blocks<'a> = HashMap<String,Cow<'a,str>>; | ||
| 31 | |||
| 32 | fn build( | ||
| 33 | base: &Option<PathBuf>, | ||
| 34 | blocks: &Blocks, | ||
| 35 | max_depth: u32 | ||
| 36 | ) { | ||
| 37 | lazy_static! { | ||
| 38 | static ref PATH: Regex = | ||
| 39 | Regex::new( | ||
| 40 | r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$" | ||
| 41 | ).unwrap(); | ||
| 42 | static ref MACRO: Regex = | ||
| 43 | Regex::new( | ||
| 44 | r"(?m)^([[:blank:]]*)<<([^>\s]+)>>" | ||
| 45 | ).unwrap(); | ||
| 46 | } | ||
| 47 | blocks | ||
| 48 | .iter() | ||
| 49 | .for_each(|(path,code)| if PATH.is_match(path) { | ||
| 50 | let mut current_depth = 0; | ||
| 51 | let mut code = code.clone(); | ||
| 52 | while let Cow::Owned(new_code) = MACRO.replace_all( | ||
| 53 | &code, | ||
| 54 | |caps: &Captures| { | ||
| 55 | if current_depth < max_depth { | ||
| 56 | let block = blocks | ||
| 57 | .get(&caps[2]) | ||
| 58 | .expect("Block not present") | ||
| 59 | .clone(); | ||
| 60 | indent(block, caps[1].len()) | ||
| 61 | } else { | ||
| 62 | eprintln!("Reached maximum depth, \ | ||
| 63 | output might be truncated.\n\ | ||
| 64 | Increase `--depth` accordingly."); | ||
| 65 | Cow::Owned(String::from("")) | ||
| 66 | } | ||
| 67 | } | ||
| 68 | ) { | ||
| 69 | code = Cow::from(new_code); | ||
| 70 | current_depth += 1; | ||
| 71 | } | ||
| 72 | let file = base | ||
| 73 | .clone() | ||
| 74 | .unwrap_or(PathBuf::from(BASE)) | ||
| 75 | .join(path); | ||
| 76 | write_to_file(file, &code) | ||
| 77 | .expect("Unable to write to file"); | ||
| 78 | }) | ||
| 79 | } | ||
| 80 | |||
| 81 | fn indent<'a>( | ||
| 82 | input: Cow<'a,str>, | ||
| 83 | indent: usize | ||
| 84 | ) -> Cow<'a,str> { | ||
| 85 | if indent > 0 { | ||
| 86 | let prefix = format!("{:indent$}", ""); | ||
| 87 | let size = input.len() + indent*input.lines().count(); | ||
| 88 | let mut output = String::with_capacity(size); | ||
| 89 | input.lines().enumerate().for_each(|(i,line)| { | ||
| 90 | if i > 0 { | ||
| 91 | output.push('\n'); | ||
| 92 | } | ||
| 93 | if !line.is_empty() { | ||
| 94 | output.push_str(&prefix); | ||
| 95 | output.push_str(line); | ||
| 96 | } | ||
| 97 | }); | ||
| 98 | Cow::Owned(output) | ||
| 99 | } else { | ||
| 100 | input | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | fn write_to_file( | ||
| 105 | path: PathBuf, content: &str | ||
| 106 | ) -> std::io::Result<()> { | ||
| 107 | if path.is_relative() { | ||
| 108 | fs::create_dir_all(path.parent().unwrap())?; | ||
| 109 | fs::write(path, content)?; | ||
| 110 | } else { | ||
| 111 | eprintln!( | ||
| 112 | "Absolute paths not supported: {}", | ||
| 113 | path.to_string_lossy() | ||
| 114 | ) | ||
| 115 | } | ||
| 116 | Ok(()) | ||
| 117 | } | ||
| 118 | |||
| 119 | |||
| 120 | fn main() -> Result<()> { | ||
| 121 | let config = Config::parse(); | ||
| 122 | let mut pandoc = Pandoc::new(); | ||
| 123 | pandoc.set_input(InputKind::Files(config.input)); | ||
| 124 | pandoc.set_input_format(InputFormat::Markdown, vec![]); | ||
| 125 | pandoc.set_output(OutputKind::Pipe); | ||
| 126 | pandoc.set_output_format(OutputFormat::Json, vec![]); | ||
| 127 | pandoc.add_filter( | ||
| 128 | move |json| pandoc_ast::filter(json, | ||
| 129 | |pandoc| { | ||
| 130 | let mut blocks: Blocks = HashMap::new(); | ||
| 131 | pandoc.blocks.iter().for_each(|block| | ||
| 132 | if let Block::CodeBlock((id,clss,attrs), code) = block { | ||
| 133 | if !id.is_empty() { | ||
| 134 | let key = { | ||
| 135 | let path = attrs.iter().find(|(k,_)| k == "path"); | ||
| 136 | if let Some(path) = path { | ||
| 137 | format!("{}{}", path.1, id) | ||
| 138 | } else { | ||
| 139 | id.to_string() | ||
| 140 | } | ||
| 141 | }; | ||
| 142 | if clss.iter().any(|c| c == "override") { | ||
| 143 | blocks.insert(key, Cow::from(code)); | ||
| 144 | } else { | ||
| 145 | blocks.entry(key) | ||
| 146 | .and_modify(|s| { | ||
| 147 | *s += "\n"; | ||
| 148 | *s += Cow::from(code) | ||
| 149 | }) | ||
| 150 | .or_insert(Cow::from(code)); | ||
| 151 | } | ||
| 152 | } else { | ||
| 153 | eprintln!("Ignoring code block without ID:"); | ||
| 154 | eprintln!("{}", indent(Cow::from(code),4)); | ||
| 155 | } | ||
| 156 | } | ||
| 157 | ); | ||
| 158 | build(&config.output, &blocks, config.depth); | ||
| 159 | pandoc | ||
| 160 | } | ||
| 161 | ) | ||
| 162 | ); | ||
| 163 | pandoc.execute().unwrap(); | ||
| 164 | Ok(()) | ||
| 165 | } \ No newline at end of file | ||
diff --git a/v0.1.0/Cargo.toml b/v0.1.0/Cargo.toml new file mode 100644 index 0000000..50660af --- /dev/null +++ b/v0.1.0/Cargo.toml | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | [package] | ||
| 2 | name = "pangler" | ||
| 3 | version = "0.1.0" | ||
| 4 | edition = "2021" | ||
| 5 | |||
| 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
| 7 | |||
| 8 | [dependencies] | ||
| 9 | lazy_static = "1.4" | ||
| 10 | regex = "1.5" | ||
| 11 | pandoc = "0.8" | ||
| 12 | pandoc_ast = "0.8" | ||
| 13 | clap = { version = "3.1", features = ["derive"] } | ||
| 14 | |||
diff --git a/v0.1.0/src/main.rs b/v0.1.0/src/main.rs new file mode 100644 index 0000000..ea4e4f7 --- /dev/null +++ b/v0.1.0/src/main.rs | |||
| @@ -0,0 +1,170 @@ | |||
| 1 | use clap::Parser; | ||
| 2 | use lazy_static::lazy_static; | ||
| 3 | use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc}; | ||
| 4 | use pandoc_ast::Block; | ||
| 5 | use regex::{Captures,Regex}; | ||
| 6 | use std::borrow::Cow; | ||
| 7 | use std::collections::HashMap; | ||
| 8 | use std::fs; | ||
| 9 | use std::io::Result; | ||
| 10 | use std::path::{Path,PathBuf}; | ||
| 11 | |||
| 12 | const BASE: &str = "code"; | ||
| 13 | |||
| 14 | type Blocks<'a> = HashMap<String,Cow<'a,str>>; | ||
| 15 | |||
| 16 | /// A tangler for Literate Programming in Pandoc | ||
| 17 | #[derive(Parser, Debug)] | ||
| 18 | #[clap(author, version, about, long_about = None)] | ||
| 19 | struct Config { | ||
| 20 | /// Maximum substitution depth | ||
| 21 | #[clap(short, long, default_value_t = 10)] | ||
| 22 | depth: u32, | ||
| 23 | |||
| 24 | /// Base output directory [default: './code'] | ||
| 25 | #[clap(short, long)] | ||
| 26 | output: Option<PathBuf>, | ||
| 27 | |||
| 28 | /// Input files | ||
| 29 | input: Vec<PathBuf>, | ||
| 30 | } | ||
| 31 | |||
| 32 | /* Write code to target file */ | ||
| 33 | fn write_to_file<P: AsRef<Path>>(base: &Option<PathBuf>, path: P, content: &str) -> std::io::Result<()> { | ||
| 34 | if path.as_ref().is_relative() { | ||
| 35 | let path = base.clone().unwrap_or(PathBuf::from(BASE)).join(path); | ||
| 36 | /* There is always *at least* the base directory as a parent */ | ||
| 37 | fs::create_dir_all(path.parent().unwrap())?; | ||
| 38 | fs::write(path, content)?; | ||
| 39 | } else { | ||
| 40 | eprintln!("Absolute paths not supported: {}", path.as_ref().to_string_lossy()) | ||
| 41 | } | ||
| 42 | Ok(()) | ||
| 43 | } | ||
| 44 | |||
| 45 | /* Indent block of code */ | ||
| 46 | fn indent<'a>(input: Cow<'a,str>, indent: usize) -> Cow<'a,str> { | ||
| 47 | if indent > 0 { | ||
| 48 | let prefix = format!("{:indent$}", ""); | ||
| 49 | let mut output = String::with_capacity(input.len() + indent*input.lines().count()); | ||
| 50 | input.lines().enumerate().for_each(|(i,line)| { | ||
| 51 | if i > 0 { | ||
| 52 | output.push('\n'); | ||
| 53 | } | ||
| 54 | if !line.is_empty() { | ||
| 55 | output.push_str(&prefix); | ||
| 56 | output.push_str(line); | ||
| 57 | } | ||
| 58 | }); | ||
| 59 | Cow::Owned(output) | ||
| 60 | } else { | ||
| 61 | input | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Here are some notes on the following function | ||
| 67 | * | ||
| 68 | * lazy_static! { | ||
| 69 | * static ref MACRO: Regex = Regex::new(r"regex").unwrap(); | ||
| 70 | * } | ||
| 71 | * | ||
| 72 | * let mut text = Cow::from("This is some text..."); | ||
| 73 | * while MACRO.is_match(&text) { | ||
| 74 | * text = MACRO.replace_all(&text, _closure); | ||
| 75 | * } | ||
| 76 | * | ||
| 77 | * The problem with this version is that due to how `Cow` works, the value returned by | ||
| 78 | * `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is | ||
| 79 | * because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes | ||
| 80 | * place, so for the returned value to be valid, `text` still needs to be available. | ||
| 81 | * But text gets overridden right away, so, in principle, if no replacement takes place `text` | ||
| 82 | * gets overridden by a reference to it (losing data). | ||
| 83 | * | ||
| 84 | * Note that this doesn't happen in practice (but the compiler doesn't know about this) because | ||
| 85 | * the `replace_all` function is applied as long as some replacement is possible (`while` | ||
| 86 | * condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value. | ||
| 87 | * | ||
| 88 | * This is how you would solve the problem instead: | ||
| 89 | * | ||
| 90 | * while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) { | ||
| 91 | * text = Cow::from(new_text); | ||
| 92 | * } | ||
| 93 | * | ||
| 94 | * In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`) | ||
| 95 | * of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using | ||
| 96 | * the `Cow::from()` function. No heap allocation is performed, and the string is not copied. | ||
| 97 | */ | ||
| 98 | fn build(base: &Option<PathBuf>, blocks: &Blocks, depth: u32) { | ||
| 99 | lazy_static! { | ||
| 100 | static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap(); | ||
| 101 | static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap(); | ||
| 102 | } | ||
| 103 | blocks.iter().for_each(|(k,v)| if PATH.is_match(k) { | ||
| 104 | let mut d = 0; | ||
| 105 | let mut code = v.clone(); // No clone is happening because the value is a `Borrowed` | ||
| 106 | // Here `replace_all` returns a `Owned` value only when a replacement takes place. | ||
| 107 | // We can use it to recursively build blocks of code until no more substitutions are | ||
| 108 | // necessary (i.e., `replace_all` returns a `Borrowed`). | ||
| 109 | while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| { | ||
| 110 | let block = if d < depth { | ||
| 111 | blocks.get(&caps[2]).expect("Block not present").clone() | ||
| 112 | } else { | ||
| 113 | eprintln!("Reached maximum depth, output might be truncated. Increase `--depth` accordingly."); | ||
| 114 | Cow::Owned(String::from("")) | ||
| 115 | }; | ||
| 116 | indent(block, caps[1].len()) | ||
| 117 | }) { | ||
| 118 | code = Cow::from(step); | ||
| 119 | d += 1; | ||
| 120 | } | ||
| 121 | write_to_file(base, k, &code).expect("Unable to write to file"); | ||
| 122 | }) | ||
| 123 | } | ||
| 124 | |||
| 125 | fn main() -> Result<()> { | ||
| 126 | let config = Config::parse(); | ||
| 127 | let mut pandoc = Pandoc::new(); | ||
| 128 | /* Pandoc input setup */ | ||
| 129 | pandoc.set_input(InputKind::Files(config.input)); | ||
| 130 | pandoc.set_input_format(InputFormat::Markdown, vec![]); | ||
| 131 | /* Pandoc output setup */ | ||
| 132 | pandoc.set_output(OutputKind::Pipe); | ||
| 133 | pandoc.set_output_format(OutputFormat::Json, vec![]); | ||
| 134 | /* Process literate program */ | ||
| 135 | pandoc.add_filter(move |json| pandoc_ast::filter(json, |pandoc| { | ||
| 136 | let mut blocks: Blocks = HashMap::new(); | ||
| 137 | pandoc.blocks.iter().for_each(|block| | ||
| 138 | if let Block::CodeBlock((id,classes,attrs), code) = block { | ||
| 139 | /* Only process blocks with an ID */ | ||
| 140 | if !id.is_empty() { | ||
| 141 | let key = { | ||
| 142 | if let Some(path) = attrs.iter().find(|(k,_)| k == "path") { | ||
| 143 | format!("{}{}", path.1, id) | ||
| 144 | } else { | ||
| 145 | id.to_string() | ||
| 146 | } | ||
| 147 | }; | ||
| 148 | /* Insert (or replace) block of code. In case of ID clash, the standard | ||
| 149 | * behaviour is to append the new code to the existing snippet. Use the class | ||
| 150 | * `.override` to override the previously encountered snippets. | ||
| 151 | */ | ||
| 152 | if classes.iter().any(|c| c == "override") { | ||
| 153 | blocks.insert(key, Cow::from(code)); | ||
| 154 | } else { | ||
| 155 | blocks.entry(key) | ||
| 156 | .and_modify(|s| { *s += "\n"; *s += Cow::from(code) }) | ||
| 157 | .or_insert(Cow::from(code)); | ||
| 158 | } | ||
| 159 | } else { | ||
| 160 | eprintln!("Ignoring codeblock without ID:"); | ||
| 161 | eprintln!("{}", indent(Cow::from(code),4)); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | ); | ||
| 165 | build(&config.output, &blocks, config.depth); | ||
| 166 | pandoc | ||
| 167 | })); | ||
| 168 | pandoc.execute().unwrap(); | ||
| 169 | Ok(()) | ||
| 170 | } | ||
