1
/*
2
 * Hurl (https://hurl.dev)
3
 * Copyright (C) 2026 Orange
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *          http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
//! Represents a text reader.
19

            
20
use std::ops::{Add, AddAssign, Sub};
21

            
22
/// The `Reader` implements methods to read a stream of text. A reader manages
23
/// an internal `cursor` : it's the current read index position within the reader's internal buffer.
24
///
25
/// Methods like [`Reader::read`], [`Reader::read_while`] do advance the internal reader's `cursor`.
26
/// Other methods, like [`Reader::peek`], [`Reader::peek_n`] allows to get the next chars in the
27
/// buffer without modifying the current reader cursor.
28
///
29
/// The cursor is composed of an offset, which is always related to the reader internal buffer.
30
/// Along the buffer offset, a position [`Pos`] is updated each time a char is read. This position
31
/// corresponds to the column and row index in the buffer document. In most of the case, the
32
/// position is initialized to the first char, but a reader instance can be created using
33
/// [`Reader::with_pos`] to set a given started position. This can be useful when a reader
34
/// is instantiated as a "sub reader" of a given reader, and we want to report position relatively
35
/// to the main reader (for errors but also for constructed structures).
36
///
37
/// # Example
38
/// ```
39
///  use hurl_core::reader::{CharPos, Reader};
40
///
41
///  let mut reader = Reader::new("hi");
42
///  assert_eq!(reader.cursor().index, CharPos(0));
43
///  assert!(!reader.is_eof());
44
///  assert_eq!(reader.peek_n(2), "hi".to_string());
45
///  assert_eq!(reader.read(), Some('h'));
46
///  assert_eq!(reader.cursor().index, CharPos(1));
47
/// ```
48
#[derive(Clone, Debug, PartialEq, Eq)]
49
pub struct Reader {
50
    buf: Vec<char>,
51
    cursor: Cursor,
52
}
53

            
54
/// Represents a line and column position in a reader.
55
///
56
/// Indices are 1-based.
57
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
58
pub struct Pos {
59
    pub line: usize,
60
    pub column: usize,
61
}
62

            
63
impl Pos {
64
    /// Creates a new position.
65
36305
    pub fn new(line: usize, column: usize) -> Pos {
66
36305
        Pos { line, column }
67
    }
68
}
69

            
70
/// A character offset.
71
///
72
/// Because of multibyte UTF-8 characters, a byte offset is not equivalent to a character offset.
73
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
74
pub struct CharPos(pub usize);
75

            
76
impl Sub for CharPos {
77
    type Output = CharPos;
78

            
79
    #[inline(always)]
80
360
    fn sub(self, rhs: Self) -> Self::Output {
81
360
        CharPos(self.0 - rhs.0)
82
    }
83
}
84

            
85
impl Add for CharPos {
86
    type Output = CharPos;
87

            
88
    #[inline(always)]
89
131545
    fn add(self, rhs: Self) -> Self::Output {
90
131545
        CharPos(self.0 + rhs.0)
91
    }
92
}
93

            
94
impl AddAssign for CharPos {
95
    #[inline(always)]
96
10467225
    fn add_assign(&mut self, rhs: Self) {
97
10467225
        self.0 += rhs.0;
98
    }
99
}
100

            
101
/// A position in a text buffer.
102
///
103
/// The position has two components: a char `offset` in the internal buffer of the reader, and
104
/// a column-row oriented position `pos`, used for human display. `pos` is usually initialized to
105
/// the first char of the buffer but it can also be set with a position inside another reader. This
106
/// allows the report of error of a sub-reader, relative to a parent reader.
107
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
108
pub struct Cursor {
109
    pub index: CharPos,
110
    pub pos: Pos,
111
}
112

            
113
impl Reader {
114
    /// Creates a new reader, position of the index is at the first char.
115
12080
    pub fn new(s: &str) -> Self {
116
12080
        Reader {
117
12080
            buf: s.chars().collect(),
118
12080
            cursor: Cursor {
119
12080
                index: CharPos(0),
120
12080
                pos: Pos { line: 1, column: 1 },
121
12080
            },
122
        }
123
    }
124

            
125
    /// Creates a new reader, `pos` is position of the index: this allow to report created
126
    /// structures and error to be referenced from this position.
127
    ///
128
    /// Note: the `buffer` offset is still initialized to 0.
129
2860
    pub fn with_pos(s: &str, pos: Pos) -> Self {
130
2860
        Reader {
131
2860
            buf: s.chars().collect(),
132
2860
            cursor: Cursor {
133
2860
                index: CharPos(0),
134
2860
                pos,
135
2860
            },
136
        }
137
    }
138

            
139
    /// Returns the current position of the read index.
140
24765965
    pub fn cursor(&self) -> Cursor {
141
24765965
        self.cursor
142
    }
143

            
144
    /// Position the read index to a new position.
145
9816785
    pub fn seek(&mut self, to: Cursor) {
146
9816785
        self.cursor = to;
147
    }
148

            
149
    /// Returns true if the reader has read all the buffer, false otherwise.
150
2970115
    pub fn is_eof(&self) -> bool {
151
2970115
        self.cursor.index.0 == self.buf.len()
152
    }
153

            
154
    /// Returns the next char from the buffer advancing the internal state.
155
10524020
    pub fn read(&mut self) -> Option<char> {
156
10524020
        match self.buf.get(self.cursor.index.0) {
157
68385
            None => None,
158
10455635
            Some(c) => {
159
10455635
                self.cursor.index += CharPos(1);
160
10455635
                if !is_combining_character(*c) {
161
10455635
                    self.cursor.pos.column += 1;
162
                }
163
10455635
                if *c == '\n' {
164
801450
                    self.cursor.pos.column = 1;
165
801450
                    self.cursor.pos.line += 1;
166
                }
167
10455635
                Some(*c)
168
            }
169
        }
170
    }
171

            
172
    /// Returns `count` chars from the buffer advancing the internal state.
173
    /// This methods can returns less than `count` chars if there is not enough chars in the buffer.
174
400
    pub fn read_n(&mut self, count: CharPos) -> String {
175
400
        let mut s = String::new();
176
400
        for _ in 0..count.0 {
177
7185
            match self.read() {
178
                None => {}
179
7185
                Some(c) => s.push(c),
180
            }
181
        }
182
400
        s
183
    }
184

            
185
    /// Returns chars from the buffer while `predicate` is true, advancing the internal state.
186
139810
    pub fn read_while(&mut self, predicate: fn(char) -> bool) -> String {
187
139810
        let mut s = String::new();
188
        loop {
189
490470
            match self.peek() {
190
484290
                Some(c) if predicate(c) => {
191
350660
                    _ = self.read();
192
350660
                    s.push(c);
193
                }
194
139810
                _ => return s,
195
            }
196
        }
197
    }
198

            
199
    /// Reads a string from a `start` position to the current position (excluded).
200
    ///
201
    /// This method doesn't modify the read index since we're reading "backwards" to the current
202
    /// read index.
203
1134165
    pub fn read_from(&self, start: CharPos) -> String {
204
1134165
        let end = self.cursor.index;
205
1134165
        self.buf[start.0..end.0].iter().collect()
206
    }
207

            
208
    /// Peeks the next char from the buffer without advancing the internal state.
209
640675
    pub fn peek(&self) -> Option<char> {
210
640675
        self.buf.get(self.cursor.index.0).copied()
211
    }
212

            
213
    /// Peeks the next char that meet a `predicate`.
214
3030
    pub fn peek_if(&self, predicate: fn(char) -> bool) -> Option<char> {
215
3030
        let mut i = self.cursor.index;
216
        loop {
217
14620
            let &c = self.buf.get(i.0)?;
218
14620
            if predicate(c) {
219
3030
                return Some(c);
220
            }
221
11590
            i += CharPos(1);
222
        }
223
    }
224

            
225
    /// Peeks a string of `count` char without advancing the internal state.
226
    /// This methods can return less than `count` chars if there is not enough chars in the buffer.
227
131545
    pub fn peek_n(&self, count: usize) -> String {
228
131545
        let start = self.cursor.index;
229
131545
        let end = (start + CharPos(count)).min(CharPos(self.buf.len()));
230
131545
        self.buf[start.0..end.0].iter().collect()
231
    }
232
}
233

            
234
10455635
fn is_combining_character(c: char) -> bool {
235
10455635
    c > '\u{0300}' && c < '\u{036F}' // Combining Diacritical Marks (0300–036F)
236
}
237

            
238
#[cfg(test)]
239
mod tests {
240
    use super::*;
241

            
242
    #[test]
243
    fn basic_reader() {
244
        let mut reader = Reader::new("hi");
245
        assert_eq!(reader.cursor().index, CharPos(0));
246
        assert!(!reader.is_eof());
247
        assert_eq!(reader.peek_n(2), "hi".to_string());
248
        assert_eq!(reader.cursor().index, CharPos(0));
249

            
250
        assert_eq!(reader.read().unwrap(), 'h');
251
        assert_eq!(reader.cursor().index, CharPos(1));
252
        assert_eq!(reader.peek().unwrap(), 'i');
253
        assert_eq!(reader.cursor().index, CharPos(1));
254
        assert_eq!(reader.read().unwrap(), 'i');
255
        assert!(reader.is_eof());
256
        assert_eq!(reader.read(), None);
257
    }
258

            
259
    #[test]
260
    fn peek_back() {
261
        let mut reader = Reader::new("abcdefgh");
262
        assert_eq!(reader.read(), Some('a'));
263
        assert_eq!(reader.read(), Some('b'));
264
        assert_eq!(reader.read(), Some('c'));
265
        assert_eq!(reader.read(), Some('d'));
266
        assert_eq!(reader.read(), Some('e'));
267
        assert_eq!(reader.peek(), Some('f'));
268
        assert_eq!(reader.read_from(CharPos(3)), "de");
269
    }
270

            
271
    #[test]
272
    fn read_while() {
273
        let mut reader = Reader::new("123456789");
274
        assert_eq!(reader.read_while(|c| c.is_numeric()), "123456789");
275
        assert_eq!(reader.cursor().index, CharPos(9));
276
        assert!(reader.is_eof());
277

            
278
        let mut reader = Reader::new("123456789abcde");
279
        assert_eq!(reader.read_while(|c| c.is_numeric()), "123456789");
280
        assert_eq!(reader.cursor().index, CharPos(9));
281
        assert!(!reader.is_eof());
282

            
283
        let mut reader = Reader::new("abcde123456789");
284
        assert_eq!(reader.read_while(|c| c.is_numeric()), "");
285
        assert_eq!(reader.cursor().index, CharPos(0));
286
    }
287

            
288
    #[test]
289
    fn reader_create_with_from_pos() {
290
        let mut main_reader = Reader::new("aaabb");
291
        _ = main_reader.read();
292
        _ = main_reader.read();
293
        _ = main_reader.read();
294

            
295
        let pos = main_reader.cursor().pos;
296
        let s = main_reader.read_while(|_| true);
297
        let mut sub_reader = Reader::with_pos(&s, pos);
298
        assert_eq!(
299
            sub_reader.cursor,
300
            Cursor {
301
                index: CharPos(0),
302
                pos: Pos::new(1, 4)
303
            }
304
        );
305

            
306
        _ = sub_reader.read();
307
        assert_eq!(
308
            sub_reader.cursor,
309
            Cursor {
310
                index: CharPos(1),
311
                pos: Pos::new(1, 5)
312
            }
313
        );
314
    }
315

            
316
    #[test]
317
    fn peek_ignoring_whitespace() {
318
        fn is_whitespace(c: char) -> bool {
319
            c == ' ' || c == '\t'
320
        }
321
        let reader = Reader::new("\t\t\tabc");
322
        assert_eq!(reader.peek_if(|c| !is_whitespace(c)), Some('a'));
323

            
324
        let reader = Reader::new("foo");
325
        assert_eq!(reader.peek_if(|c| !is_whitespace(c)), Some('f'));
326
    }
327
}