-
Notifications
You must be signed in to change notification settings - Fork 236
/
Copy pathnoop-tokenize.rs
48 lines (38 loc) · 1.62 KB
/
noop-tokenize.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Run a single benchmark once. For use with profiling tools.
extern crate html5ever;
use std::cell::RefCell;
use std::io;
use html5ever::tendril::*;
use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
use markup5ever::buffer_queue::BufferQueue;
/// In our case, our sink only contains a tokens vector
struct Sink(RefCell<Vec<Token>>);
impl TokenSink for Sink {
type Handle = ();
/// Each processed token will be handled by this method
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
self.0.borrow_mut().push(token);
TokenSinkResult::Continue
}
}
/// In this example we implement the TokenSink trait which lets us implement how each
/// parsed token is treated. In our example we take each token and insert it into a vector.
fn main() {
// Read HTML from standard input
let mut chunk = ByteTendril::new();
io::stdin().read_to_tendril(&mut chunk).unwrap();
let input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());
let tok = Tokenizer::new(Sink(RefCell::new(Vec::new())), Default::default());
let _ = tok.feed(&input);
assert!(input.is_empty());
tok.end();
}