1
/*
2
 * Hurl (https://hurl.dev)
3
 * Copyright (C) 2024 Orange
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *          http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
//! Represents a text reader.
19

            
20
/// The `Reader` implements methods to read a stream of text. A reader manages
21
/// an internal `cursor` : it's the current read index position within the reader's internal buffer.
22
///
23
/// Methods like [`Reader::read`], [`Reader::read_while`] do advance the internal reader's `cursor`.
24
/// Other methods, like [`Reader::peek`], [`Reader::peek_n`] allows to get the next chars in the
25
/// buffer without modifying the current reader cursor.
26
///
27
/// The cursor is composed of an offset, which is always related to the reader internal buffer.
28
/// Along the buffer offset, a position [`Pos`] is updated each time a char is read. This position
29
/// corresponds to the column and row index in the buffer document. In most of the case, the
30
/// position is initialized to the first char, but a reader instance can be created using
31
/// [`Reader::with_pos`] to set a given started position. This can be useful when a reader
32
/// is instantiated as a "sub reader" of a given reader, and we want to report position relatively
33
/// to the main reader (for errors but also for constructed structures).
34
///
35
/// # Example
36
/// ```
37
///  use hurl_core::reader::Reader;
38
///
39
///  let mut reader = Reader::new("hi");
40
///  assert_eq!(reader.cursor().index, 0);
41
///  assert!(!reader.is_eof());
42
///  assert_eq!(reader.peek_n(2), "hi".to_string());
43
///  assert_eq!(reader.read(), Some('h'));
44
///  assert_eq!(reader.cursor().index, 1);
45
/// ```
46
#[derive(Clone, Debug, PartialEq, Eq)]
47
pub struct Reader {
48
    buf: Vec<char>,
49
    cursor: Cursor,
50
}
51

            
52
/// Represents a line and column position in a reader.
53
///
54
/// Indices are 1-based.
55
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
56
pub struct Pos {
57
    pub line: usize,
58
    pub column: usize,
59
}
60

            
61
impl Pos {
62
    /// Creates a new position.
63
44495
    pub fn new(line: usize, column: usize) -> Pos {
64
44495
        Pos { line, column }
65
    }
66
}
67

            
68
/// A position in a text buffer.
69
///
70
/// The position has two components: a char `offset` in the internal buffer of the reader, and
71
/// a column-row oriented position `pos`, used for human display. `pos` is usually initialized to
72
/// the first char of the buffer but it can also be set with a position inside another reader. This
73
/// allows the report of error of a sub-reader, relative to a parent reader.
74
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
75
pub struct Cursor {
76
    pub index: usize,
77
    pub pos: Pos,
78
}
79

            
80
impl Reader {
81
    /// Creates a new reader, position of the index is at the first char.
82
8405
    pub fn new(s: &str) -> Self {
83
8405
        Reader {
84
8405
            buf: s.chars().collect(),
85
8405
            cursor: Cursor {
86
8405
                index: 0,
87
8405
                pos: Pos { line: 1, column: 1 },
88
8405
            },
89
        }
90
    }
91

            
92
    /// Creates a new reader, `pos` is position of the index: this allow to report created
93
    /// structures and error to be referenced from this position.
94
    ///
95
    /// Note: the `buffer` offset is still initialized to 0.
96
1805
    pub fn with_pos(s: &str, pos: Pos) -> Self {
97
1805
        Reader {
98
1805
            buf: s.chars().collect(),
99
1805
            cursor: Cursor { index: 0, pos },
100
        }
101
    }
102

            
103
    /// Returns the current position of the read index.
104
21583170
    pub fn cursor(&self) -> Cursor {
105
21583170
        self.cursor
106
    }
107

            
108
    /// Position the read index to a new position.
109
8800190
    pub fn seek(&mut self, to: Cursor) {
110
8800190
        self.cursor = to;
111
    }
112

            
113
    /// Returns true if the reader has read all the buffer, false otherwise.
114
3241685
    pub fn is_eof(&self) -> bool {
115
3241685
        self.cursor.index == self.buf.len()
116
    }
117

            
118
    /// Returns the next char from the buffer advancing the internal state.
119
9323680
    pub fn read(&mut self) -> Option<char> {
120
9323680
        match self.buf.get(self.cursor.index) {
121
38890
            None => None,
122
9284790
            Some(c) => {
123
9284790
                self.cursor.index += 1;
124
9284790
                if !is_combining_character(*c) {
125
9284790
                    self.cursor.pos.column += 1;
126
                }
127
9284790
                if *c == '\n' {
128
712605
                    self.cursor.pos.column = 1;
129
712605
                    self.cursor.pos.line += 1;
130
                }
131
9284790
                Some(*c)
132
            }
133
        }
134
    }
135

            
136
    /// Returns `count` chars from the buffer advancing the internal state.
137
    /// This methods can returns less than `count` chars if there is not enough chars in the buffer.
138
315
    pub fn read_n(&mut self, count: usize) -> String {
139
315
        let mut s = String::new();
140
315
        for _ in 0..count {
141
6015
            match self.read() {
142
                None => {}
143
6015
                Some(c) => s.push(c),
144
            }
145
        }
146
315
        s
147
    }
148

            
149
    /// Returns chars from the buffer while `predicate` is true, advancing the internal state.
150
105565
    pub fn read_while(&mut self, predicate: fn(char) -> bool) -> String {
151
105565
        let mut s = String::new();
152
        loop {
153
379165
            match self.peek() {
154
7015
                None => return s,
155
372150
                Some(c) => {
156
372150
                    if predicate(c) {
157
273600
                        _ = self.read();
158
273600
                        s.push(c);
159
273600
                    } else {
160
98550
                        return s;
161
                    }
162
                }
163
            }
164
        }
165
    }
166

            
167
    /// Reads a string from a `start` position to the current position (excluded).
168
    ///
169
    /// This method doesn't modify the read index since we're reading "backwards" to the current
170
    /// read index.
171
393965
    pub fn read_from(&self, start: usize) -> String {
172
393965
        let end = self.cursor.index;
173
393965
        self.buf[start..end].iter().collect()
174
    }
175

            
176
    /// Peeks the next char from the buffer without advancing the internal state.
177
469950
    pub fn peek(&self) -> Option<char> {
178
469950
        self.buf.get(self.cursor.index).copied()
179
    }
180

            
181
    /// Peeks the next char that meet a `predicate`.
182
1715
    pub fn peek_if(&self, predicate: fn(char) -> bool) -> Option<char> {
183
1715
        let mut i = self.cursor.index;
184
        loop {
185
8565
            let &c = self.buf.get(i)?;
186
8565
            if predicate(c) {
187
1715
                return Some(c);
188
            }
189
6850
            i += 1;
190
        }
191
    }
192

            
193
    /// Peeks a string of `count` char without advancing the internal state.
194
    /// This methods can return less than `count` chars if there is not enough chars in the buffer.
195
125000
    pub fn peek_n(&self, count: usize) -> String {
196
125000
        let start = self.cursor.index;
197
125000
        let end = (start + count).min(self.buf.len());
198
125000
        self.buf[start..end].iter().collect()
199
    }
200
}
201

            
202
9284790
fn is_combining_character(c: char) -> bool {
203
9284790
    c > '\u{0300}' && c < '\u{036F}' // Combining Diacritical Marks (0300–036F)
204
}
205

            
206
#[cfg(test)]
207
mod tests {
208
    use super::*;
209

            
210
    #[test]
211
    fn basic_reader() {
212
        let mut reader = Reader::new("hi");
213
        assert_eq!(reader.cursor().index, 0);
214
        assert!(!reader.is_eof());
215
        assert_eq!(reader.peek_n(2), "hi".to_string());
216
        assert_eq!(reader.cursor().index, 0);
217

            
218
        assert_eq!(reader.read().unwrap(), 'h');
219
        assert_eq!(reader.cursor().index, 1);
220
        assert_eq!(reader.peek().unwrap(), 'i');
221
        assert_eq!(reader.cursor().index, 1);
222
        assert_eq!(reader.read().unwrap(), 'i');
223
        assert!(reader.is_eof());
224
        assert_eq!(reader.read(), None);
225
    }
226

            
227
    #[test]
228
    fn peek_back() {
229
        let mut reader = Reader::new("abcdefgh");
230
        assert_eq!(reader.read(), Some('a'));
231
        assert_eq!(reader.read(), Some('b'));
232
        assert_eq!(reader.read(), Some('c'));
233
        assert_eq!(reader.read(), Some('d'));
234
        assert_eq!(reader.read(), Some('e'));
235
        assert_eq!(reader.peek(), Some('f'));
236
        assert_eq!(reader.read_from(3), "de");
237
    }
238

            
239
    #[test]
240
    fn read_while() {
241
        let mut reader = Reader::new("123456789");
242
        assert_eq!(reader.read_while(|c| c.is_numeric()), "123456789");
243
        assert_eq!(reader.cursor().index, 9);
244
        assert!(reader.is_eof());
245

            
246
        let mut reader = Reader::new("123456789abcde");
247
        assert_eq!(reader.read_while(|c| c.is_numeric()), "123456789");
248
        assert_eq!(reader.cursor().index, 9);
249
        assert!(!reader.is_eof());
250

            
251
        let mut reader = Reader::new("abcde123456789");
252
        assert_eq!(reader.read_while(|c| c.is_numeric()), "");
253
        assert_eq!(reader.cursor().index, 0);
254
    }
255

            
256
    #[test]
257
    fn reader_create_with_from_pos() {
258
        let mut main_reader = Reader::new("aaabb");
259
        _ = main_reader.read();
260
        _ = main_reader.read();
261
        _ = main_reader.read();
262

            
263
        let pos = main_reader.cursor().pos;
264
        let s = main_reader.read_while(|_| true);
265
        let mut sub_reader = Reader::with_pos(&s, pos);
266
        assert_eq!(
267
            sub_reader.cursor,
268
            Cursor {
269
                index: 0,
270
                pos: Pos::new(1, 4)
271
            }
272
        );
273

            
274
        _ = sub_reader.read();
275
        assert_eq!(
276
            sub_reader.cursor,
277
            Cursor {
278
                index: 1,
279
                pos: Pos::new(1, 5)
280
            }
281
        );
282
    }
283

            
284
    #[test]
285
    fn peek_ignoring_whitespace() {
286
        fn is_whitespace(c: char) -> bool {
287
            c == ' ' || c == '\t'
288
        }
289
        let reader = Reader::new("\t\t\tabc");
290
        assert_eq!(reader.peek_if(|c| !is_whitespace(c)), Some('a'));
291

            
292
        let reader = Reader::new("foo");
293
        assert_eq!(reader.peek_if(|c| !is_whitespace(c)), Some('f'));
294
    }
295
}