aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFederico Igne <git@federicoigne.com>2022-08-28 17:07:34 +0100
committerFederico Igne <git@federicoigne.com>2022-08-28 17:07:34 +0100
commit76328b1334e1429e1a2719af2d041ac5fa86dd0e (patch)
treea2f4145bd9e242a20f1f70993ef28bac24fc3e00
parent3f0792e3fb1a17d99147bf2cb8ac8d2442d2cdb6 (diff)
downloadpangler-76328b1334e1429e1a2719af2d041ac5fa86dd0e.tar.gz
pangler-76328b1334e1429e1a2719af2d041ac5fa86dd0e.zip
feat(source): provide tangled code alongside the literate programv0.3.0
On one hand this defeats the purpose of having the literate program, but it avoids the awkward "bootstrapping" experience.
-rw-r--r--Cargo.toml11
-rw-r--r--README.md16
-rwxr-xr-xbin/pangler-v0.1.0bin3588064 -> 0 bytes
-rw-r--r--src/main.rs165
-rw-r--r--v0.1.0/Cargo.toml14
-rw-r--r--v0.1.0/src/main.rs170
6 files changed, 365 insertions, 11 deletions
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..644ee68
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,11 @@
1[package]
2name = "pangler"
3version = "0.3.0"
4edition = "2021"
5
6[dependencies]
7clap = { version = "3.1", features = ["derive"] }
8pandoc = "0.8"
9pandoc_ast = "0.8"
10lazy_static = "1.4"
11regex = "1.5" \ No newline at end of file
diff --git a/README.md b/README.md
index af555c8..69ca732 100644
--- a/README.md
+++ b/README.md
@@ -124,21 +124,15 @@ if clss.iter().any(|c| c == "override") {
124 124
125## Tangling: generating the source files 125## Tangling: generating the source files
126 126
127To bootstrap the tangling process, an early version of `pangler` is provided under `bin/` in this repository. 127To bootstrap the tangling process, a tangled version of the program is provided alongside the literate version.
128 128
129You can generate the code for the current version of the program, in the current working directory, with 129The executable can be compiled from the root of the project with
130
131```sh
132./bin/pangler-v0.1.0 README.md
133```
134
135and compile it with
136 130
137```sh 131```sh
138cargo build --release 132cargo build --release
139``` 133```
140 134
141From now on you can make changes to the `README.md` file and use the latest version of `pangler` to tangle and compile it. 135From now on you can make changes to the `README.md` file and use your latest compiled version of `pangler` to tangle and compile it.
142 136
143## Weaving: generating the documentation 137## Weaving: generating the documentation
144 138
@@ -565,12 +559,12 @@ fn write_to_file(
565 559
566# Credits 560# Credits
567 561
568`pangler v0.2.0` was created by Federico Igne (git@federicoigne.com) and available at [`https://git.dyamon.me/projects/pangler`](https://git.dyamon.me/projects/pangler). 562`pangler` was created by Federico Igne (git@federicoigne.com) and available at [`https://git.dyamon.me/projects/pangler`](https://git.dyamon.me/projects/pangler).
569 563
570```{#Cargo.toml .toml} 564```{#Cargo.toml .toml}
571[package] 565[package]
572name = "pangler" 566name = "pangler"
573version = "0.2.0" 567version = "0.3.0"
574edition = "2021" 568edition = "2021"
575 569
576[dependencies] 570[dependencies]
diff --git a/bin/pangler-v0.1.0 b/bin/pangler-v0.1.0
deleted file mode 100755
index fd07616..0000000
--- a/bin/pangler-v0.1.0
+++ /dev/null
Binary files differ
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..02fbcd4
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,165 @@
1use clap::Parser;
2use pandoc::{
3 InputFormat,InputKind,OutputFormat,OutputKind,Pandoc
4};
5use pandoc_ast::Block;
6use std::borrow::Cow;
7use std::collections::HashMap;
8use lazy_static::lazy_static;
9use regex::{Captures,Regex};
10use std::fs;
11use std::io::Result;
12use std::path::PathBuf;
13
14const BASE: &str = "./";
15
16/// A tangler for Literate Programming in Pandoc
17#[derive(Parser, Debug)]
18#[clap(author, version, about, long_about = None)]
19struct Config {
20 /// Maximum substitution depth
21 #[clap(short, long, default_value_t = 10)]
22 depth: u32,
23 /// Base output directory [default: './']
24 #[clap(short, long)]
25 output: Option<PathBuf>,
26 /// Input files
27 input: Vec<PathBuf>,
28}
29
30type Blocks<'a> = HashMap<String,Cow<'a,str>>;
31
32fn build(
33 base: &Option<PathBuf>,
34 blocks: &Blocks,
35 max_depth: u32
36) {
37 lazy_static! {
38 static ref PATH: Regex =
39 Regex::new(
40 r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$"
41 ).unwrap();
42 static ref MACRO: Regex =
43 Regex::new(
44 r"(?m)^([[:blank:]]*)<<([^>\s]+)>>"
45 ).unwrap();
46 }
47 blocks
48 .iter()
49 .for_each(|(path,code)| if PATH.is_match(path) {
50 let mut current_depth = 0;
51 let mut code = code.clone();
52 while let Cow::Owned(new_code) = MACRO.replace_all(
53 &code,
54 |caps: &Captures| {
55 if current_depth < max_depth {
56 let block = blocks
57 .get(&caps[2])
58 .expect("Block not present")
59 .clone();
60 indent(block, caps[1].len())
61 } else {
62 eprintln!("Reached maximum depth, \
63 output might be truncated.\n\
64 Increase `--depth` accordingly.");
65 Cow::Owned(String::from(""))
66 }
67 }
68 ) {
69 code = Cow::from(new_code);
70 current_depth += 1;
71 }
72 let file = base
73 .clone()
74 .unwrap_or(PathBuf::from(BASE))
75 .join(path);
76 write_to_file(file, &code)
77 .expect("Unable to write to file");
78 })
79}
80
81fn indent<'a>(
82 input: Cow<'a,str>,
83 indent: usize
84) -> Cow<'a,str> {
85 if indent > 0 {
86 let prefix = format!("{:indent$}", "");
87 let size = input.len() + indent*input.lines().count();
88 let mut output = String::with_capacity(size);
89 input.lines().enumerate().for_each(|(i,line)| {
90 if i > 0 {
91 output.push('\n');
92 }
93 if !line.is_empty() {
94 output.push_str(&prefix);
95 output.push_str(line);
96 }
97 });
98 Cow::Owned(output)
99 } else {
100 input
101 }
102}
103
104fn write_to_file(
105 path: PathBuf, content: &str
106) -> std::io::Result<()> {
107 if path.is_relative() {
108 fs::create_dir_all(path.parent().unwrap())?;
109 fs::write(path, content)?;
110 } else {
111 eprintln!(
112 "Absolute paths not supported: {}",
113 path.to_string_lossy()
114 )
115 }
116 Ok(())
117}
118
119
120fn main() -> Result<()> {
121 let config = Config::parse();
122 let mut pandoc = Pandoc::new();
123 pandoc.set_input(InputKind::Files(config.input));
124 pandoc.set_input_format(InputFormat::Markdown, vec![]);
125 pandoc.set_output(OutputKind::Pipe);
126 pandoc.set_output_format(OutputFormat::Json, vec![]);
127 pandoc.add_filter(
128 move |json| pandoc_ast::filter(json,
129 |pandoc| {
130 let mut blocks: Blocks = HashMap::new();
131 pandoc.blocks.iter().for_each(|block|
132 if let Block::CodeBlock((id,clss,attrs), code) = block {
133 if !id.is_empty() {
134 let key = {
135 let path = attrs.iter().find(|(k,_)| k == "path");
136 if let Some(path) = path {
137 format!("{}{}", path.1, id)
138 } else {
139 id.to_string()
140 }
141 };
142 if clss.iter().any(|c| c == "override") {
143 blocks.insert(key, Cow::from(code));
144 } else {
145 blocks.entry(key)
146 .and_modify(|s| {
147 *s += "\n";
148 *s += Cow::from(code)
149 })
150 .or_insert(Cow::from(code));
151 }
152 } else {
153 eprintln!("Ignoring code block without ID:");
154 eprintln!("{}", indent(Cow::from(code),4));
155 }
156 }
157 );
158 build(&config.output, &blocks, config.depth);
159 pandoc
160 }
161 )
162 );
163 pandoc.execute().unwrap();
164 Ok(())
165} \ No newline at end of file
diff --git a/v0.1.0/Cargo.toml b/v0.1.0/Cargo.toml
new file mode 100644
index 0000000..50660af
--- /dev/null
+++ b/v0.1.0/Cargo.toml
@@ -0,0 +1,14 @@
1[package]
2name = "pangler"
3version = "0.1.0"
4edition = "2021"
5
6# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7
8[dependencies]
9lazy_static = "1.4"
10regex = "1.5"
11pandoc = "0.8"
12pandoc_ast = "0.8"
13clap = { version = "3.1", features = ["derive"] }
14
diff --git a/v0.1.0/src/main.rs b/v0.1.0/src/main.rs
new file mode 100644
index 0000000..ea4e4f7
--- /dev/null
+++ b/v0.1.0/src/main.rs
@@ -0,0 +1,170 @@
1use clap::Parser;
2use lazy_static::lazy_static;
3use pandoc::{InputFormat,InputKind,OutputFormat,OutputKind,Pandoc};
4use pandoc_ast::Block;
5use regex::{Captures,Regex};
6use std::borrow::Cow;
7use std::collections::HashMap;
8use std::fs;
9use std::io::Result;
10use std::path::{Path,PathBuf};
11
12const BASE: &str = "code";
13
14type Blocks<'a> = HashMap<String,Cow<'a,str>>;
15
16/// A tangler for Literate Programming in Pandoc
17#[derive(Parser, Debug)]
18#[clap(author, version, about, long_about = None)]
19struct Config {
20 /// Maximum substitution depth
21 #[clap(short, long, default_value_t = 10)]
22 depth: u32,
23
24 /// Base output directory [default: './code']
25 #[clap(short, long)]
26 output: Option<PathBuf>,
27
28 /// Input files
29 input: Vec<PathBuf>,
30}
31
32/* Write code to target file */
33fn write_to_file<P: AsRef<Path>>(base: &Option<PathBuf>, path: P, content: &str) -> std::io::Result<()> {
34 if path.as_ref().is_relative() {
35 let path = base.clone().unwrap_or(PathBuf::from(BASE)).join(path);
36 /* There is always *at least* the base directory as a parent */
37 fs::create_dir_all(path.parent().unwrap())?;
38 fs::write(path, content)?;
39 } else {
40 eprintln!("Absolute paths not supported: {}", path.as_ref().to_string_lossy())
41 }
42 Ok(())
43}
44
45/* Indent block of code */
46fn indent<'a>(input: Cow<'a,str>, indent: usize) -> Cow<'a,str> {
47 if indent > 0 {
48 let prefix = format!("{:indent$}", "");
49 let mut output = String::with_capacity(input.len() + indent*input.lines().count());
50 input.lines().enumerate().for_each(|(i,line)| {
51 if i > 0 {
52 output.push('\n');
53 }
54 if !line.is_empty() {
55 output.push_str(&prefix);
56 output.push_str(line);
57 }
58 });
59 Cow::Owned(output)
60 } else {
61 input
62 }
63}
64
65/*
66 * Here are some notes on the following function
67 *
68 * lazy_static! {
69 * static ref MACRO: Regex = Regex::new(r"regex").unwrap();
70 * }
71 *
72 * let mut text = Cow::from("This is some text...");
73 * while MACRO.is_match(&text) {
74 * text = MACRO.replace_all(&text, _closure);
75 * }
76 *
77 * The problem with this version is that due to how `Cow` works, the value returned by
78 * `replace_all` cannot live more than the borrowed `text` passed as a parameter. This is
79 * because the function returns a reference to `text` (Cow::Borrowed) if no replacement takes
80 * place, so for the returned value to be valid, `text` still needs to be available.
81 * But text gets overridden right away, so, in principle, if no replacement takes place `text`
82 * gets overridden by a reference to it (losing data).
83 *
84 * Note that this doesn't happen in practice (but the compiler doesn't know about this) because
85 * the `replace_all` function is applied as long as some replacement is possible (`while`
86 * condition). In other words, all calls to `replace_all` always return an `Cow::Owned` value.
87 *
88 * This is how you would solve the problem instead:
89 *
90 * while let Cow::Owned(new_text) = MACRO.replace_all(&text, _closure) {
91 * text = Cow::from(new_text);
92 * }
93 *
94 * In this case, the matched `Cow::Owned` is not concerned by any lifetime (type is `Cow<'_,str>`)
95 * of the borrowed value `text`. Moreover `text` takes ownership of `new_text: String` using
96 * the `Cow::from()` function. No heap allocation is performed, and the string is not copied.
97 */
98fn build(base: &Option<PathBuf>, blocks: &Blocks, depth: u32) {
99 lazy_static! {
100 static ref PATH: Regex = Regex::new(r"^(?:[[:word:]\.-]+/)*[[:word:]\.-]+\.[[:alpha:]]+$").unwrap();
101 static ref MACRO: Regex = Regex::new(r"(?m)^([[:blank:]]*)<<([^>\s]+)>>").unwrap();
102 }
103 blocks.iter().for_each(|(k,v)| if PATH.is_match(k) {
104 let mut d = 0;
105 let mut code = v.clone(); // No clone is happening because the value is a `Borrowed`
106 // Here `replace_all` returns a `Owned` value only when a replacement takes place.
107 // We can use it to recursively build blocks of code until no more substitutions are
108 // necessary (i.e., `replace_all` returns a `Borrowed`).
109 while let Cow::Owned(step) = MACRO.replace_all(&code, |caps: &Captures| {
110 let block = if d < depth {
111 blocks.get(&caps[2]).expect("Block not present").clone()
112 } else {
113 eprintln!("Reached maximum depth, output might be truncated. Increase `--depth` accordingly.");
114 Cow::Owned(String::from(""))
115 };
116 indent(block, caps[1].len())
117 }) {
118 code = Cow::from(step);
119 d += 1;
120 }
121 write_to_file(base, k, &code).expect("Unable to write to file");
122 })
123}
124
125fn main() -> Result<()> {
126 let config = Config::parse();
127 let mut pandoc = Pandoc::new();
128 /* Pandoc input setup */
129 pandoc.set_input(InputKind::Files(config.input));
130 pandoc.set_input_format(InputFormat::Markdown, vec![]);
131 /* Pandoc output setup */
132 pandoc.set_output(OutputKind::Pipe);
133 pandoc.set_output_format(OutputFormat::Json, vec![]);
134 /* Process literate program */
135 pandoc.add_filter(move |json| pandoc_ast::filter(json, |pandoc| {
136 let mut blocks: Blocks = HashMap::new();
137 pandoc.blocks.iter().for_each(|block|
138 if let Block::CodeBlock((id,classes,attrs), code) = block {
139 /* Only process blocks with an ID */
140 if !id.is_empty() {
141 let key = {
142 if let Some(path) = attrs.iter().find(|(k,_)| k == "path") {
143 format!("{}{}", path.1, id)
144 } else {
145 id.to_string()
146 }
147 };
148 /* Insert (or replace) block of code. In case of ID clash, the standard
149 * behaviour is to append the new code to the existing snippet. Use the class
150 * `.override` to override the previously encountered snippets.
151 */
152 if classes.iter().any(|c| c == "override") {
153 blocks.insert(key, Cow::from(code));
154 } else {
155 blocks.entry(key)
156 .and_modify(|s| { *s += "\n"; *s += Cow::from(code) })
157 .or_insert(Cow::from(code));
158 }
159 } else {
160 eprintln!("Ignoring codeblock without ID:");
161 eprintln!("{}", indent(Cow::from(code),4));
162 }
163 }
164 );
165 build(&config.output, &blocks, config.depth);
166 pandoc
167 }));
168 pandoc.execute().unwrap();
169 Ok(())
170}