1
/*
2
 * Hurl (https://hurl.dev)
3
 * Copyright (C) 2025 Orange
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *          http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
//! Represents a text reader.
19

            
20
use std::ops::{Add, AddAssign, Sub};
21

            
22
/// The `Reader` implements methods to read a stream of text. A reader manages
23
/// an internal `cursor` : it's the current read index position within the reader's internal buffer.
24
///
25
/// Methods like [`Reader::read`], [`Reader::read_while`] do advance the internal reader's `cursor`.
26
/// Other methods, like [`Reader::peek`], [`Reader::peek_n`] allows to get the next chars in the
27
/// buffer without modifying the current reader cursor.
28
///
29
/// The cursor is composed of an offset, which is always related to the reader internal buffer.
30
/// Along the buffer offset, a position [`Pos`] is updated each time a char is read. This position
31
/// corresponds to the column and row index in the buffer document. In most of the case, the
32
/// position is initialized to the first char, but a reader instance can be created using
33
/// [`Reader::with_pos`] to set a given started position. This can be useful when a reader
34
/// is instantiated as a "sub reader" of a given reader, and we want to report position relatively
35
/// to the main reader (for errors but also for constructed structures).
36
///
37
/// # Example
38
/// ```
39
///  use hurl_core::reader::{CharPos, Reader};
40
///
41
///  let mut reader = Reader::new("hi");
42
///  assert_eq!(reader.cursor().index, CharPos(0));
43
///  assert!(!reader.is_eof());
44
///  assert_eq!(reader.peek_n(2), "hi".to_string());
45
///  assert_eq!(reader.read(), Some('h'));
46
///  assert_eq!(reader.cursor().index, CharPos(1));
47
/// ```
48
#[derive(Clone, Debug, PartialEq, Eq)]
49
pub struct Reader {
50
    buf: Vec<char>,
51
    cursor: Cursor,
52
}
53

            
54
/// Represents a line and column position in a reader.
55
///
56
/// Indices are 1-based.
57
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
58
pub struct Pos {
59
    pub line: usize,
60
    pub column: usize,
61
}
62

            
63
impl Pos {
64
    /// Creates a new position.
65
30700
    pub fn new(line: usize, column: usize) -> Pos {
66
30700
        Pos { line, column }
67
    }
68
}
69

            
70
/// A character offset.
71
///
72
/// Because of multibyte UTF-8 characters, a byte offset is not equivalent to a character offset.
73
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
74
pub struct CharPos(pub usize);
75

            
76
impl Sub for CharPos {
77
    type Output = CharPos;
78

            
79
    #[inline(always)]
80
345
    fn sub(self, rhs: Self) -> Self::Output {
81
345
        CharPos(self.0 - rhs.0)
82
    }
83
}
84

            
85
impl Add for CharPos {
86
    type Output = CharPos;
87

            
88
    #[inline(always)]
89
128485
    fn add(self, rhs: Self) -> Self::Output {
90
128485
        CharPos(self.0 + rhs.0)
91
    }
92
}
93

            
94
impl AddAssign for CharPos {
95
    #[inline(always)]
96
9177255
    fn add_assign(&mut self, rhs: Self) {
97
9177255
        self.0 += rhs.0;
98
    }
99
}
100

            
101
/// A position in a text buffer.
102
///
103
/// The position has two components: a char `offset` in the internal buffer of the reader, and
104
/// a column-row oriented position `pos`, used for human display. `pos` is usually initialized to
105
/// the first char of the buffer but it can also be set with a position inside another reader. This
106
/// allows the report of error of a sub-reader, relative to a parent reader.
107
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
108
pub struct Cursor {
109
    pub index: CharPos,
110
    pub pos: Pos,
111
}
112

            
113
impl Reader {
114
    /// Creates a new reader, position of the index is at the first char.
115
10255
    pub fn new(s: &str) -> Self {
116
10255
        Reader {
117
10255
            buf: s.chars().collect(),
118
10255
            cursor: Cursor {
119
10255
                index: CharPos(0),
120
10255
                pos: Pos { line: 1, column: 1 },
121
10255
            },
122
        }
123
    }
124

            
125
    /// Creates a new reader, `pos` is position of the index: this allow to report created
126
    /// structures and error to be referenced from this position.
127
    ///
128
    /// Note: the `buffer` offset is still initialized to 0.
129
2655
    pub fn with_pos(s: &str, pos: Pos) -> Self {
130
2655
        Reader {
131
2655
            buf: s.chars().collect(),
132
2655
            cursor: Cursor {
133
2655
                index: CharPos(0),
134
2655
                pos,
135
2655
            },
136
        }
137
    }
138

            
139
    /// Returns the current position of the read index.
140
21515830
    pub fn cursor(&self) -> Cursor {
141
21515830
        self.cursor
142
    }
143

            
144
    /// Position the read index to a new position.
145
8389830
    pub fn seek(&mut self, to: Cursor) {
146
8389830
        self.cursor = to;
147
    }
148

            
149
    /// Returns true if the reader has read all the buffer, false otherwise.
150
2711740
    pub fn is_eof(&self) -> bool {
151
2711740
        self.cursor.index.0 == self.buf.len()
152
    }
153

            
154
    /// Returns the next char from the buffer advancing the internal state.
155
9219300
    pub fn read(&mut self) -> Option<char> {
156
9219300
        match self.buf.get(self.cursor.index.0) {
157
50500
            None => None,
158
9168800
            Some(c) => {
159
9168800
                self.cursor.index += CharPos(1);
160
9168800
                if !is_combining_character(*c) {
161
9168800
                    self.cursor.pos.column += 1;
162
                }
163
9168800
                if *c == '\n' {
164
723605
                    self.cursor.pos.column = 1;
165
723605
                    self.cursor.pos.line += 1;
166
                }
167
9168800
                Some(*c)
168
            }
169
        }
170
    }
171

            
172
    /// Returns `count` chars from the buffer advancing the internal state.
173
    /// This methods can returns less than `count` chars if there is not enough chars in the buffer.
174
345
    pub fn read_n(&mut self, count: CharPos) -> String {
175
345
        let mut s = String::new();
176
345
        for _ in 0..count.0 {
177
6925
            match self.read() {
178
                None => {}
179
6925
                Some(c) => s.push(c),
180
            }
181
        }
182
345
        s
183
    }
184

            
185
    /// Returns chars from the buffer while `predicate` is true, advancing the internal state.
186
127340
    pub fn read_while(&mut self, predicate: fn(char) -> bool) -> String {
187
127340
        let mut s = String::new();
188
        loop {
189
483350
            match self.peek() {
190
10270
                None => return s,
191
473080
                Some(c) => {
192
473080
                    if predicate(c) {
193
356010
                        _ = self.read();
194
356010
                        s.push(c);
195
356010
                    } else {
196
117070
                        return s;
197
                    }
198
                }
199
            }
200
        }
201
    }
202

            
203
    /// Reads a string from a `start` position to the current position (excluded).
204
    ///
205
    /// This method doesn't modify the read index since we're reading "backwards" to the current
206
    /// read index.
207
988870
    pub fn read_from(&self, start: CharPos) -> String {
208
988870
        let end = self.cursor.index;
209
988870
        self.buf[start.0..end.0].iter().collect()
210
    }
211

            
212
    /// Peeks the next char from the buffer without advancing the internal state.
213
598060
    pub fn peek(&self) -> Option<char> {
214
598060
        self.buf.get(self.cursor.index.0).copied()
215
    }
216

            
217
    /// Peeks the next char that meet a `predicate`.
218
2265
    pub fn peek_if(&self, predicate: fn(char) -> bool) -> Option<char> {
219
2265
        let mut i = self.cursor.index;
220
        loop {
221
10720
            let &c = self.buf.get(i.0)?;
222
10720
            if predicate(c) {
223
2265
                return Some(c);
224
            }
225
8455
            i += CharPos(1);
226
        }
227
    }
228

            
229
    /// Peeks a string of `count` char without advancing the internal state.
230
    /// This methods can return less than `count` chars if there is not enough chars in the buffer.
231
128485
    pub fn peek_n(&self, count: usize) -> String {
232
128485
        let start = self.cursor.index;
233
128485
        let end = (start + CharPos(count)).min(CharPos(self.buf.len()));
234
128485
        self.buf[start.0..end.0].iter().collect()
235
    }
236
}
237

            
238
9168800
fn is_combining_character(c: char) -> bool {
239
9168800
    c > '\u{0300}' && c < '\u{036F}' // Combining Diacritical Marks (0300–036F)
240
}
241

            
242
#[cfg(test)]
243
mod tests {
244
    use super::*;
245

            
246
    #[test]
247
    fn basic_reader() {
248
        let mut reader = Reader::new("hi");
249
        assert_eq!(reader.cursor().index, CharPos(0));
250
        assert!(!reader.is_eof());
251
        assert_eq!(reader.peek_n(2), "hi".to_string());
252
        assert_eq!(reader.cursor().index, CharPos(0));
253

            
254
        assert_eq!(reader.read().unwrap(), 'h');
255
        assert_eq!(reader.cursor().index, CharPos(1));
256
        assert_eq!(reader.peek().unwrap(), 'i');
257
        assert_eq!(reader.cursor().index, CharPos(1));
258
        assert_eq!(reader.read().unwrap(), 'i');
259
        assert!(reader.is_eof());
260
        assert_eq!(reader.read(), None);
261
    }
262

            
263
    #[test]
264
    fn peek_back() {
265
        let mut reader = Reader::new("abcdefgh");
266
        assert_eq!(reader.read(), Some('a'));
267
        assert_eq!(reader.read(), Some('b'));
268
        assert_eq!(reader.read(), Some('c'));
269
        assert_eq!(reader.read(), Some('d'));
270
        assert_eq!(reader.read(), Some('e'));
271
        assert_eq!(reader.peek(), Some('f'));
272
        assert_eq!(reader.read_from(CharPos(3)), "de");
273
    }
274

            
275
    #[test]
276
    fn read_while() {
277
        let mut reader = Reader::new("123456789");
278
        assert_eq!(reader.read_while(|c| c.is_numeric()), "123456789");
279
        assert_eq!(reader.cursor().index, CharPos(9));
280
        assert!(reader.is_eof());
281

            
282
        let mut reader = Reader::new("123456789abcde");
283
        assert_eq!(reader.read_while(|c| c.is_numeric()), "123456789");
284
        assert_eq!(reader.cursor().index, CharPos(9));
285
        assert!(!reader.is_eof());
286

            
287
        let mut reader = Reader::new("abcde123456789");
288
        assert_eq!(reader.read_while(|c| c.is_numeric()), "");
289
        assert_eq!(reader.cursor().index, CharPos(0));
290
    }
291

            
292
    #[test]
293
    fn reader_create_with_from_pos() {
294
        let mut main_reader = Reader::new("aaabb");
295
        _ = main_reader.read();
296
        _ = main_reader.read();
297
        _ = main_reader.read();
298

            
299
        let pos = main_reader.cursor().pos;
300
        let s = main_reader.read_while(|_| true);
301
        let mut sub_reader = Reader::with_pos(&s, pos);
302
        assert_eq!(
303
            sub_reader.cursor,
304
            Cursor {
305
                index: CharPos(0),
306
                pos: Pos::new(1, 4)
307
            }
308
        );
309

            
310
        _ = sub_reader.read();
311
        assert_eq!(
312
            sub_reader.cursor,
313
            Cursor {
314
                index: CharPos(1),
315
                pos: Pos::new(1, 5)
316
            }
317
        );
318
    }
319

            
320
    #[test]
321
    fn peek_ignoring_whitespace() {
322
        fn is_whitespace(c: char) -> bool {
323
            c == ' ' || c == '\t'
324
        }
325
        let reader = Reader::new("\t\t\tabc");
326
        assert_eq!(reader.peek_if(|c| !is_whitespace(c)), Some('a'));
327

            
328
        let reader = Reader::new("foo");
329
        assert_eq!(reader.peek_if(|c| !is_whitespace(c)), Some('f'));
330
    }
331
}