Draft incremental markdown parsing

Specially useful when dealing with long Markdown
streams, like LLMs.
This commit is contained in:
Héctor Ramón Jiménez 2025-01-31 17:35:38 +01:00
parent 6aab76e3a0
commit 128058ea94
No known key found for this signature in database
GPG key ID: 7CC46565708259A7
3 changed files with 167 additions and 28 deletions

View file

@ -7,6 +7,6 @@ publish = false
[dependencies]
iced.workspace = true
iced.features = ["markdown", "highlighter", "debug"]
iced.features = ["markdown", "highlighter", "tokio", "debug"]
open = "5.3"

View file

@ -1,23 +1,37 @@
use iced::highlighter;
use iced::widget::{self, markdown, row, scrollable, text_editor};
use iced::{Element, Fill, Font, Task, Theme};
use iced::time::{self, milliseconds};
use iced::widget::{
self, hover, markdown, right, row, scrollable, text_editor, toggler,
};
use iced::{Element, Fill, Font, Subscription, Task, Theme};
pub fn main() -> iced::Result {
iced::application("Markdown - Iced", Markdown::update, Markdown::view)
.subscription(Markdown::subscription)
.theme(Markdown::theme)
.run_with(Markdown::new)
}
struct Markdown {
content: text_editor::Content,
items: Vec<markdown::Item>,
mode: Mode,
theme: Theme,
}
enum Mode {
Oneshot(Vec<markdown::Item>),
Stream {
pending: String,
parsed: markdown::Content,
},
}
#[derive(Debug, Clone)]
enum Message {
Edit(text_editor::Action),
LinkClicked(markdown::Url),
ToggleStream(bool),
NextToken,
}
impl Markdown {
@ -29,7 +43,7 @@ impl Markdown {
(
Self {
content: text_editor::Content::with_text(INITIAL_CONTENT),
items: markdown::parse(INITIAL_CONTENT).collect(),
mode: Mode::Oneshot(markdown::parse(INITIAL_CONTENT).collect()),
theme,
},
widget::focus_next(),
@ -44,13 +58,48 @@ impl Markdown {
self.content.perform(action);
if is_edit {
self.items =
markdown::parse(&self.content.text()).collect();
self.mode = match self.mode {
Mode::Oneshot(_) => Mode::Oneshot(
markdown::parse(&self.content.text()).collect(),
),
Mode::Stream { .. } => Mode::Stream {
pending: self.content.text(),
parsed: markdown::Content::parse(""),
},
}
}
}
Message::LinkClicked(link) => {
let _ = open::that_in_background(link.to_string());
}
Message::ToggleStream(enable_stream) => {
self.mode = if enable_stream {
Mode::Stream {
pending: self.content.text(),
parsed: markdown::Content::parse(""),
}
} else {
Mode::Oneshot(
markdown::parse(&self.content.text()).collect(),
)
};
}
Message::NextToken => match &mut self.mode {
Mode::Oneshot(_) => {}
Mode::Stream { pending, parsed } => {
if pending.is_empty() {
self.mode = Mode::Oneshot(parsed.items().to_vec());
} else {
let mut tokens = pending.split(' ');
if let Some(token) = tokens.next() {
parsed.push_str(&format!("{token} "));
}
*pending = tokens.collect::<Vec<_>>().join(" ");
}
}
},
}
}
@ -63,20 +112,45 @@ impl Markdown {
.font(Font::MONOSPACE)
.highlight("markdown", highlighter::Theme::Base16Ocean);
let items = match &self.mode {
Mode::Oneshot(items) => items.as_slice(),
Mode::Stream { parsed, .. } => parsed.items(),
};
let preview = markdown(
&self.items,
items,
markdown::Settings::default(),
markdown::Style::from_palette(self.theme.palette()),
)
.map(Message::LinkClicked);
row![editor, scrollable(preview).spacing(10).height(Fill)]
.spacing(10)
.padding(10)
.into()
row![
editor,
hover(
scrollable(preview).spacing(10).width(Fill).height(Fill),
right(
toggler(matches!(self.mode, Mode::Stream { .. }))
.label("Stream")
.on_toggle(Message::ToggleStream)
)
.padding([0, 20])
)
]
.spacing(10)
.padding(10)
.into()
}
fn theme(&self) -> Theme {
self.theme.clone()
}
fn subscription(&self) -> Subscription<Message> {
match self.mode {
Mode::Oneshot(_) => Subscription::none(),
Mode::Stream { .. } => {
time::every(milliseconds(20)).map(|_| Message::NextToken)
}
}
}
}

View file

@ -47,6 +47,7 @@
//! }
//! }
//! ```
#![allow(missing_docs)]
use crate::core::border;
use crate::core::font::{self, Font};
use crate::core::padding;
@ -57,12 +58,47 @@ use crate::core::{
use crate::{column, container, rich_text, row, scrollable, span, text};
use std::cell::{Cell, RefCell};
use std::ops::Range;
use std::sync::Arc;
pub use core::text::Highlight;
pub use pulldown_cmark::HeadingLevel;
pub use url::Url;
#[derive(Debug, Clone)]
pub struct Content {
items: Vec<Item>,
state: State,
}
impl Content {
pub fn parse(markdown: &str) -> Self {
let mut state = State::default();
let items = parse_with(&mut state, markdown).collect();
Self { items, state }
}
pub fn push_str(&mut self, markdown: &str) {
// Append to last leftover text
let mut leftover = std::mem::take(&mut self.state.leftover);
leftover.push_str(markdown);
// Pop the last item
let _ = self.items.pop();
// Re-parse last item and new text
let new_items = parse_with(&mut self.state, &leftover);
self.items.extend(new_items);
dbg!(&self.state);
}
pub fn items(&self) -> &[Item] {
&self.items
}
}
/// A Markdown item.
#[derive(Debug, Clone)]
pub enum Item {
@ -232,6 +268,24 @@ impl Span {
/// }
/// ```
pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
parse_with(State::default(), markdown)
}
#[derive(Debug, Clone, Default)]
pub struct State {
leftover: String,
}
impl AsMut<Self> for State {
fn as_mut(&mut self) -> &mut Self {
self
}
}
fn parse_with<'a>(
mut state: impl AsMut<State> + 'a,
markdown: &'a str,
) -> impl Iterator<Item = Item> + 'a {
struct List {
start: Option<u64>,
items: Vec<Vec<Item>>,
@ -255,27 +309,31 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
| pulldown_cmark::Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS
| pulldown_cmark::Options::ENABLE_TABLES
| pulldown_cmark::Options::ENABLE_STRIKETHROUGH,
);
)
.into_offset_iter();
let produce = |lists: &mut Vec<List>, item| {
if lists.is_empty() {
Some(item)
} else {
lists
.last_mut()
.expect("list context")
.items
.last_mut()
.expect("item context")
.push(item);
let mut produce =
move |lists: &mut Vec<List>, item, source: Range<usize>| {
if lists.is_empty() {
state.as_mut().leftover = markdown[source.start..].to_owned();
None
}
};
Some(item)
} else {
lists
.last_mut()
.expect("list context")
.items
.last_mut()
.expect("item context")
.push(item);
None
}
};
// We want to keep the `spans` capacity
#[allow(clippy::drain_collect)]
parser.filter_map(move |event| match event {
parser.filter_map(move |(event, source)| match event {
pulldown_cmark::Event::Start(tag) => match tag {
pulldown_cmark::Tag::Strong if !metadata && !table => {
strong = true;
@ -311,6 +369,7 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
produce(
&mut lists,
Item::Paragraph(Text::new(spans.drain(..).collect())),
source,
)
};
@ -350,6 +409,7 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
produce(
&mut lists,
Item::Paragraph(Text::new(spans.drain(..).collect())),
source,
)
};
@ -370,6 +430,7 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
produce(
&mut lists,
Item::Heading(level, Text::new(spans.drain(..).collect())),
source,
)
}
pulldown_cmark::TagEnd::Strong if !metadata && !table => {
@ -392,6 +453,7 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
produce(
&mut lists,
Item::Paragraph(Text::new(spans.drain(..).collect())),
source,
)
}
pulldown_cmark::TagEnd::Item if !metadata && !table => {
@ -401,6 +463,7 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
produce(
&mut lists,
Item::Paragraph(Text::new(spans.drain(..).collect())),
source,
)
}
}
@ -413,6 +476,7 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
start: list.start,
items: list.items,
},
source,
)
}
pulldown_cmark::TagEnd::CodeBlock if !metadata && !table => {
@ -424,6 +488,7 @@ pub fn parse(markdown: &str) -> impl Iterator<Item = Item> + '_ {
produce(
&mut lists,
Item::CodeBlock(Text::new(spans.drain(..).collect())),
source,
)
}
pulldown_cmark::TagEnd::MetadataBlock(_) => {