aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2024-03-25 15:36:37 -0700
committerGitHub <noreply@github.com>2024-03-25 15:36:37 -0700
commitabadad464090a897813e35539d669f707ea3a8b4 (patch)
tree50f8857955f5a66868f10318618a928c45c82f35 /lib
parent32b4d85605c9bdde13597a83116e5940be48a5be (diff)
parentad34ed5a63ef912fba5232806a1adea6ea55181b (diff)
downloadzig-abadad464090a897813e35539d669f707ea3a8b4.tar.gz
zig-abadad464090a897813e35539d669f707ea3a8b4.zip
Merge pull request #19402 from ianprime0509/markdown-autolinks
Autodoc: hyperlink URLs in text
Diffstat (limited to 'lib')
-rw-r--r--lib/docs/wasm/markdown.zig55
-rw-r--r--lib/docs/wasm/markdown/Document.zig2
-rw-r--r--lib/docs/wasm/markdown/Parser.zig159
-rw-r--r--lib/docs/wasm/markdown/renderer.zig6
4 files changed, 221 insertions, 1 deletions
diff --git a/lib/docs/wasm/markdown.zig b/lib/docs/wasm/markdown.zig
index 4ce1ee15b4..e0bf4bbaac 100644
--- a/lib/docs/wasm/markdown.zig
+++ b/lib/docs/wasm/markdown.zig
@@ -75,6 +75,17 @@
//! content. `target` may contain `\`-escaped characters and balanced
//! parentheses.
//!
+//! - **Autolink** - an abbreviated link, of the format `<target>`, where
+//! `target` serves as both the link target and text. `target` may not
+//! contain spaces or `<`, and any `\` in it are interpreted literally (not as
+//! escapes). `target` is expected to be an absolute URI: an autolink will not
+//! be recognized unless `target` starts with a URI scheme followed by a `:`.
+//!
+//! For convenience, autolinks may also be recognized in plain text without
+//! any `<>` delimiters. Such autolinks are restricted to start with `http://`
+//! or `https://` followed by at least one other character, not including any
+//! trailing punctuation after the link.
+//!
//! - **Image** - a link directly preceded by a `!`. The link text is
//! interpreted as the alt text of the image.
//!
@@ -710,6 +721,50 @@ test "links" {
);
}
+test "autolinks" {
+ try testRender(
+ \\<https://example.com>
+ \\**This is important: <https://example.com/strong>**
+ \\<https://example.com?query=abc.123#page(parens)>
+ \\<placeholder>
+ \\<data:>
+ \\1 < 2
+ \\4 > 3
+ \\Unclosed: <
+ \\
+ ,
+ \\<p><a href="https://example.com">https://example.com</a>
+ \\<strong>This is important: <a href="https://example.com/strong">https://example.com/strong</a></strong>
+ \\<a href="https://example.com?query=abc.123#page(parens)">https://example.com?query=abc.123#page(parens)</a>
+ \\&lt;placeholder&gt;
+ \\<a href="data:">data:</a>
+ \\1 &lt; 2
+ \\4 &gt; 3
+ \\Unclosed: &lt;</p>
+ \\
+ );
+}
+
+test "text autolinks" {
+ try testRender(
+ \\Text autolinks must start with http:// or https://.
+ \\This doesn't count: ftp://example.com.
+ \\Example: https://ziglang.org.
+ \\Here is an important link: **http://example.com**
+ \\(Links may be in parentheses: https://example.com/?q=(parens))
+ \\Escaping a link so it's plain text: https\://example.com
+ \\
+ ,
+ \\<p>Text autolinks must start with http:// or https://.
+ \\This doesn't count: ftp://example.com.
+ \\Example: <a href="https://ziglang.org">https://ziglang.org</a>.
+ \\Here is an important link: <strong><a href="http://example.com">http://example.com</a></strong>
+ \\(Links may be in parentheses: <a href="https://example.com/?q=(parens)">https://example.com/?q=(parens)</a>)
+ \\Escaping a link so it's plain text: https://example.com</p>
+ \\
+ );
+}
+
test "images" {
try testRender(
\\![Alt text](https://example.com/image.png)
diff --git a/lib/docs/wasm/markdown/Document.zig b/lib/docs/wasm/markdown/Document.zig
index 9e43e35795..f3c0fdeed0 100644
--- a/lib/docs/wasm/markdown/Document.zig
+++ b/lib/docs/wasm/markdown/Document.zig
@@ -51,6 +51,8 @@ pub const Node = struct {
// Inlines
/// Data is `link`.
link,
+ /// Data is `text`.
+ autolink,
/// Data is `link`.
image,
/// Data is `container`.
diff --git a/lib/docs/wasm/markdown/Parser.zig b/lib/docs/wasm/markdown/Parser.zig
index 7cee596746..9b377dce34 100644
--- a/lib/docs/wasm/markdown/Parser.zig
+++ b/lib/docs/wasm/markdown/Parser.zig
@@ -985,8 +985,12 @@ const InlineParser = struct {
ip.pos += 1;
},
']' => try ip.parseLink(),
+ '<' => try ip.parseAutolink(),
'*', '_' => try ip.parseEmphasis(),
'`' => try ip.parseCodeSpan(),
+ 'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) {
+ try ip.parseTextAutolink();
+ },
else => {},
}
}
@@ -1076,6 +1080,161 @@ const InlineParser = struct {
return @enumFromInt(string_top);
}
+ /// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the
+ /// closing `>`, or remains unchanged at the opening `<` if there is none.
+ fn parseAutolink(ip: *InlineParser) !void {
+ const start = ip.pos;
+ ip.pos += 1;
+ var state: enum {
+ start,
+ scheme,
+ target,
+ } = .start;
+ while (ip.pos < ip.content.len) : (ip.pos += 1) {
+ switch (state) {
+ .start => switch (ip.content[ip.pos]) {
+ 'A'...'Z', 'a'...'z' => state = .scheme,
+ else => break,
+ },
+ .scheme => switch (ip.content[ip.pos]) {
+ 'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {},
+ ':' => state = .target,
+ else => break,
+ },
+ .target => switch (ip.content[ip.pos]) {
+ '<', ' ', '\t', '\n' => break, // Not allowed in autolinks
+ '>' => {
+ // Backslash escapes are not recognized in autolink targets.
+ const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]);
+ const node = try ip.parent.addNode(.{
+ .tag = .autolink,
+ .data = .{ .text = .{
+ .content = target,
+ } },
+ });
+ try ip.completed_inlines.append(ip.parent.allocator, .{
+ .node = node,
+ .start = start,
+ .len = ip.pos - start + 1,
+ });
+ return;
+ },
+ else => {},
+ },
+ }
+ }
+ ip.pos = start;
+ }
+
+ /// Parses a plain text autolink (not delimited by `<>`), starting at the
+ /// first character in the link (an `h`). `ip.pos` is left at the last
+ /// character of the link, or remains unchanged if there is no valid link.
+ fn parseTextAutolink(ip: *InlineParser) !void {
+ const start = ip.pos;
+ var state: union(enum) {
+ /// Inside `http`. Contains the rest of the text to be matched.
+ http: []const u8,
+ after_http,
+ after_https,
+ /// Inside `://`. Contains the rest of the text to be matched.
+ authority: []const u8,
+ /// Inside link content.
+ content: struct {
+ start: usize,
+ paren_nesting: usize,
+ },
+ } = .{ .http = "http" };
+
+ while (ip.pos < ip.content.len) : (ip.pos += 1) {
+ switch (state) {
+ .http => |rest| {
+ if (ip.content[ip.pos] != rest[0]) break;
+ if (rest.len > 1) {
+ state = .{ .http = rest[1..] };
+ } else {
+ state = .after_http;
+ }
+ },
+ .after_http => switch (ip.content[ip.pos]) {
+ 's' => state = .after_https,
+ ':' => state = .{ .authority = "//" },
+ else => break,
+ },
+ .after_https => switch (ip.content[ip.pos]) {
+ ':' => state = .{ .authority = "//" },
+ else => break,
+ },
+ .authority => |rest| {
+ if (ip.content[ip.pos] != rest[0]) break;
+ if (rest.len > 1) {
+ state = .{ .authority = rest[1..] };
+ } else {
+ state = .{ .content = .{
+ .start = ip.pos + 1,
+ .paren_nesting = 0,
+ } };
+ }
+ },
+ .content => |*content| switch (ip.content[ip.pos]) {
+ ' ', '\t', '\n' => break,
+ '(' => content.paren_nesting += 1,
+ ')' => if (content.paren_nesting == 0) {
+ break;
+ } else {
+ content.paren_nesting -= 1;
+ },
+ else => {},
+ },
+ }
+ }
+
+ switch (state) {
+ .http, .after_http, .after_https, .authority => {
+ ip.pos = start;
+ },
+ .content => |content| {
+ while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) {
+ ip.pos -= 1;
+ }
+ if (ip.pos == content.start) {
+ ip.pos = start;
+ return;
+ }
+
+ const target = try ip.parent.addString(ip.content[start..ip.pos]);
+ const node = try ip.parent.addNode(.{
+ .tag = .autolink,
+ .data = .{ .text = .{
+ .content = target,
+ } },
+ });
+ try ip.completed_inlines.append(ip.parent.allocator, .{
+ .node = node,
+ .start = start,
+ .len = ip.pos - start,
+ });
+ ip.pos -= 1;
+ },
+ }
+ }
+
+ /// Returns whether `c` may appear before a text autolink is recognized.
+ fn isPreTextAutolink(c: u8) bool {
+ return switch (c) {
+ ' ', '\t', '\n', '*', '_', '(' => true,
+ else => false,
+ };
+ }
+
+ /// Returns whether `c` is punctuation that may appear after a text autolink
+ /// and not be considered part of it.
+ fn isPostTextAutolink(c: u8) bool {
+ return switch (c) {
+ '?', '!', '.', ',', ':', '*', '_' => true,
+ else => false,
+ };
+ }
+
/// Parses emphasis, starting at the beginning of a run of `*` or `_`
/// characters. `ip.pos` is left at the last character in the run after
/// parsing.
diff --git a/lib/docs/wasm/markdown/renderer.zig b/lib/docs/wasm/markdown/renderer.zig
index fd361a379e..1e6041399a 100644
--- a/lib/docs/wasm/markdown/renderer.zig
+++ b/lib/docs/wasm/markdown/renderer.zig
@@ -140,6 +140,10 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type {
}
try writer.writeAll("</a>");
},
+ .autolink => {
+ const target = doc.string(data.text.content);
+ try writer.print("<a href=\"{0}\">{0}</a>", .{fmtHtml(target)});
+ },
.image => {
const target = doc.string(data.link.target);
try writer.print("<img src=\"{}\" alt=\"", .{fmtHtml(target)});
@@ -215,7 +219,7 @@ pub fn renderInlineNodeText(
try renderInlineNodeText(doc, child, writer);
}
},
- .code_span, .text => {
+ .autolink, .code_span, .text => {
const content = doc.string(data.text.content);
try writer.print("{}", .{fmtHtml(content)});
},