1
/*
2
 * Hurl (https://hurl.dev)
3
 * Copyright (C) 2024 Orange
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *          http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
use crate::ast::*;
19
use crate::combinator::one_or_more;
20
use crate::parser::error::*;
21
use crate::parser::primitives::*;
22
use crate::parser::{template, ParseResult};
23
use crate::reader::Reader;
24

            
25
/// Steps:
26
/// 1- parse String until end of stream, end of line
27
///    the string does not contain trailing space
28
/// 2- templatize
29
5090
pub fn unquoted_template(reader: &mut Reader) -> ParseResult<Template> {
30
5090
    let start = reader.cursor();
31
5090
    let mut chars = vec![];
32
5090
    let mut spaces = vec![];
33
5090
    let mut end = start;
34
    loop {
35
60010
        let pos = reader.cursor().pos;
36
60010
        match any_char(&['#'], reader) {
37
5090
            Err(e) => {
38
5090
                if e.recoverable {
39
5090
                    break;
40
                } else {
41
                    return Err(e);
42
                }
43
            }
44
54920
            Ok((c, s)) => {
45
54920
                if s == "\n" {
46
                    break;
47
                }
48
54920
                if s == " " {
49
1615
                    spaces.push((c, s, pos));
50
1615
                } else {
51
53305
                    if !spaces.is_empty() {
52
1090
                        chars.append(&mut spaces);
53
1090
                        spaces = vec![];
54
                    }
55
53305
                    chars.push((c, s, pos));
56
53305
                    end = reader.cursor();
57
                }
58
            }
59
        }
60
    }
61
5090
    reader.seek(end);
62
5090
    let encoded_string = template::EncodedString {
63
5090
        source_info: SourceInfo::new(start.pos, end.pos),
64
5090
        chars,
65
5090
    };
66
5090
    let elements = template::templatize(encoded_string)?;
67
5090
    Ok(Template {
68
5090
        delimiter: None,
69
5090
        elements,
70
5090
        source_info: SourceInfo::new(start.pos, end.pos),
71
5090
    })
72
}
73

            
74
// TODO: should return an EncodedString
75
// (decoding escape sequence)
76
550
pub fn quoted_oneline_string(reader: &mut Reader) -> ParseResult<String> {
77
550
    literal("\"", reader)?;
78
7155
    let s = reader.read_while(|c| c != '"' && c != '\n');
79
550
    literal("\"", reader)?;
80
545
    Ok(s)
81
}
82

            
83
22335
pub fn quoted_template(reader: &mut Reader) -> ParseResult<Template> {
84
22335
    let start = reader.cursor();
85
22335
    let mut end = start;
86
22335
    try_literal("\"", reader)?;
87
21495
    let mut chars = vec![];
88
    loop {
89
279780
        let pos = reader.cursor().pos;
90
279780
        let save = reader.cursor();
91
279780
        match any_char(&['"'], reader) {
92
21495
            Err(e) => {
93
21495
                if e.recoverable {
94
21490
                    reader.seek(save);
95
21490
                    break;
96
                } else {
97
5
                    return Err(e);
98
                }
99
            }
100
258285
            Ok((c, s)) => {
101
258285
                chars.push((c, s, pos));
102
258285
                end = reader.cursor();
103
            }
104
        }
105
    }
106
21490
    literal("\"", reader)?;
107
21485
    let encoded_string = template::EncodedString {
108
21485
        source_info: SourceInfo::new(start.pos, end.pos),
109
21485
        chars,
110
21485
    };
111
21485
    let elements = template::templatize(encoded_string)?;
112
21485
    Ok(Template {
113
21485
        delimiter: Some('"'),
114
21485
        elements,
115
21485
        source_info: SourceInfo::new(start.pos, reader.cursor().pos),
116
21485
    })
117
}
118

            
119
24745
pub fn backtick_template(reader: &mut Reader) -> ParseResult<Template> {
120
24745
    let delimiter = Some('`');
121
24745
    let start = reader.cursor();
122
24745
    let mut end = start;
123
24745
    try_literal("`", reader)?;
124
1485
    let mut chars = vec![];
125
    loop {
126
29650
        let pos = reader.cursor().pos;
127
29650
        let save = reader.cursor();
128
29650
        match any_char(&['`', '\n'], reader) {
129
1485
            Err(e) => {
130
1485
                if e.recoverable {
131
1485
                    reader.seek(save);
132
1485
                    break;
133
                } else {
134
                    return Err(e);
135
                }
136
            }
137
28165
            Ok((c, s)) => {
138
28165
                chars.push((c, s, pos));
139
28165
                end = reader.cursor();
140
            }
141
        }
142
    }
143
1485
    literal("`", reader)?;
144
1485
    let encoded_string = template::EncodedString {
145
1485
        source_info: SourceInfo::new(start.pos, end.pos),
146
1485
        chars,
147
1485
    };
148
1485
    let elements = template::templatize(encoded_string)?;
149
1485
    Ok(Template {
150
1485
        delimiter,
151
1485
        elements,
152
1485
        source_info: SourceInfo::new(start.pos, reader.cursor().pos),
153
1485
    })
154
}
155

            
156
369440
fn any_char(except: &[char], reader: &mut Reader) -> ParseResult<(char, String)> {
157
369440
    let start = reader.cursor();
158
369440
    match escape_char(reader) {
159
2030
        Ok(c) => Ok((c, reader.read_from(start.index))),
160
367410
        Err(e) => {
161
367410
            if e.recoverable {
162
367405
                reader.seek(start);
163
367405
                match reader.read() {
164
                    None => {
165
545
                        let kind = ParseErrorKind::Expecting {
166
545
                            value: "char".to_string(),
167
545
                        };
168
545
                        Err(ParseError::new(start.pos, true, kind))
169
                    }
170
366860
                    Some(c) => {
171
366860
                        if except.contains(&c)
172
343715
                            || ['\\', '\x08', '\n', '\x0c', '\r', '\t'].contains(&c)
173
                        {
174
27520
                            let kind = ParseErrorKind::Expecting {
175
27520
                                value: "char".to_string(),
176
27520
                            };
177
27520
                            Err(ParseError::new(start.pos, true, kind))
178
                        } else {
179
339340
                            Ok((c, reader.read_from(start.index)))
180
                        }
181
                    }
182
                }
183
            } else {
184
5
                Err(e)
185
            }
186
        }
187
    }
188
}
189

            
190
370175
pub fn escape_char(reader: &mut Reader) -> ParseResult<char> {
191
370175
    try_literal("\\", reader)?;
192
2075
    let start = reader.cursor();
193
2075
    match reader.read() {
194
35
        Some('#') => Ok('#'),
195
530
        Some('"') => Ok('"'),
196
        Some('`') => Ok('`'),
197
355
        Some('\\') => Ok('\\'),
198
        Some('/') => Ok('/'),
199
        Some('b') => Ok('\x08'),
200
970
        Some('n') => Ok('\n'),
201
        Some('f') => Ok('\x0c'),
202
20
        Some('r') => Ok('\r'),
203
35
        Some('t') => Ok('\t'),
204
125
        Some('u') => unicode(reader),
205
5
        _ => Err(ParseError::new(
206
5
            start.pos,
207
5
            false,
208
5
            ParseErrorKind::EscapeChar,
209
5
        )),
210
    }
211
}
212

            
213
205
pub(crate) fn unicode(reader: &mut Reader) -> ParseResult<char> {
214
205
    literal("{", reader)?;
215
205
    let v = hex_value(reader)?;
216
205
    let c = match std::char::from_u32(v) {
217
        None => {
218
            return Err(ParseError::new(
219
                reader.cursor().pos,
220
                false,
221
                ParseErrorKind::Unicode,
222
            ))
223
        }
224
205
        Some(c) => c,
225
205
    };
226
205
    literal("}", reader)?;
227
205
    Ok(c)
228
}
229

            
230
205
fn hex_value(reader: &mut Reader) -> ParseResult<u32> {
231
205
    let mut digits = one_or_more(hex_digit, reader)?;
232
205
    let mut v = 0;
233
205
    let mut weight = 1;
234
205
    digits.reverse();
235
565
    for d in digits.iter() {
236
565
        v += weight * d;
237
565
        weight *= 16;
238
    }
239
205
    Ok(v)
240
}
241

            
242
#[cfg(test)]
243
mod tests {
244
    use std::time::SystemTime;
245

            
246
    use super::*;
247
    use crate::reader::Pos;
248

            
249
    #[test]
250
    fn test_unquoted_template_empty() {
251
        let mut reader = Reader::new("");
252
        assert_eq!(
253
            unquoted_template(&mut reader).unwrap(),
254
            Template {
255
                delimiter: None,
256
                elements: vec![],
257
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 1)),
258
            }
259
        );
260
        assert_eq!(reader.cursor().index, 0);
261
    }
262

            
263
    #[test]
264
    fn test_unquoted_template_with_hash() {
265
        let mut reader = Reader::new("a#");
266
        assert_eq!(
267
            unquoted_template(&mut reader).unwrap(),
268
            Template {
269
                delimiter: None,
270
                elements: vec![TemplateElement::String {
271
                    value: "a".to_string(),
272
                    encoded: "a".to_string(),
273
                }],
274
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 2)),
275
            }
276
        );
277
        assert_eq!(reader.cursor().index, 1);
278
    }
279

            
280
    #[test]
281
    fn test_unquoted_template_with_encoded_hash() {
282
        let mut reader = Reader::new("a\\u{23}");
283
        assert_eq!(
284
            unquoted_template(&mut reader).unwrap(),
285
            Template {
286
                delimiter: None,
287
                elements: vec![TemplateElement::String {
288
                    value: "a#".to_string(),
289
                    encoded: "a\\u{23}".to_string(),
290
                }],
291
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 8)),
292
            }
293
        );
294
        assert_eq!(reader.cursor().index, 7);
295
    }
296

            
297
    #[test]
298
    fn test_unquoted_template_with_quote() {
299
        let mut reader = Reader::new("\"hi\"");
300
        assert_eq!(
301
            unquoted_template(&mut reader).unwrap(),
302
            Template {
303
                delimiter: None,
304
                elements: vec![TemplateElement::String {
305
                    value: "\"hi\"".to_string(),
306
                    encoded: "\"hi\"".to_string(),
307
                }],
308
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 5)),
309
            }
310
        );
311
        assert_eq!(reader.cursor().index, 4);
312
    }
313

            
314
    #[test]
315
    fn test_unquoted_template_hello_world() {
316
        let mut reader = Reader::new("hello\\u{20}{{name}}!");
317
        assert_eq!(
318
            unquoted_template(&mut reader).unwrap(),
319
            Template {
320
                delimiter: None,
321
                elements: vec![
322
                    TemplateElement::String {
323
                        value: "hello ".to_string(),
324
                        encoded: "hello\\u{20}".to_string(),
325
                    },
326
                    TemplateElement::Expression(Expr {
327
                        space0: Whitespace {
328
                            value: String::new(),
329
                            source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 14)),
330
                        },
331
                        variable: Variable {
332
                            name: "name".to_string(),
333
                            source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 18)),
334
                        },
335
                        space1: Whitespace {
336
                            value: String::new(),
337
                            source_info: SourceInfo::new(Pos::new(1, 18), Pos::new(1, 18)),
338
                        },
339
                    }),
340
                    TemplateElement::String {
341
                        value: "!".to_string(),
342
                        encoded: "!".to_string(),
343
                    },
344
                ],
345
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 21)),
346
            }
347
        );
348
        assert_eq!(reader.cursor().index, 20);
349
    }
350

            
351
    #[test]
352
    fn test_quoted_template() {
353
        let mut reader = Reader::new("\"\"");
354
        assert_eq!(
355
            quoted_template(&mut reader).unwrap(),
356
            Template {
357
                delimiter: Some('"'),
358
                elements: vec![],
359
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 3)),
360
            }
361
        );
362
        assert_eq!(reader.cursor().index, 2);
363

            
364
        let mut reader = Reader::new("\"a#\"");
365
        assert_eq!(
366
            quoted_template(&mut reader).unwrap(),
367
            Template {
368
                delimiter: Some('"'),
369
                elements: vec![TemplateElement::String {
370
                    value: "a#".to_string(),
371
                    encoded: "a#".to_string(),
372
                }],
373
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 5)),
374
            }
375
        );
376
        assert_eq!(reader.cursor().index, 4);
377

            
378
        let mut reader = Reader::new("\"{0}\"");
379
        assert_eq!(
380
            quoted_template(&mut reader).unwrap(),
381
            Template {
382
                delimiter: Some('"'),
383
                elements: vec![TemplateElement::String {
384
                    value: "{0}".to_string(),
385
                    encoded: "{0}".to_string(),
386
                }],
387
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 6)),
388
            }
389
        );
390
        assert_eq!(reader.cursor().index, 5);
391
    }
392

            
393
    #[test]
394
    fn test_quoted_template_with_quote() {
395
        // "\"hi\""
396
        let mut reader = Reader::new("\"\\\"hi\\\"\"");
397
        assert_eq!(
398
            quoted_template(&mut reader).unwrap(),
399
            Template {
400
                delimiter: Some('"'),
401
                elements: vec![TemplateElement::String {
402
                    value: "\"hi\"".to_string(),
403
                    encoded: "\\\"hi\\\"".to_string()
404
                }],
405
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 9)),
406
            }
407
        );
408
        assert_eq!(reader.cursor().index, 8);
409
    }
410

            
411
    #[test]
412
    fn test_quoted_template_error_missing_closing_quote() {
413
        let mut reader = Reader::new("\"not found");
414
        let error = quoted_template(&mut reader).err().unwrap();
415
        assert_eq!(
416
            error.pos,
417
            Pos {
418
                line: 1,
419
                column: 11
420
            }
421
        );
422
        assert!(!error.recoverable);
423
    }
424

            
425
    #[test]
426
    fn test_quoted_string() {
427
        let mut reader = Reader::new("\"\"");
428
        assert_eq!(quoted_oneline_string(&mut reader).unwrap(), "");
429
        assert_eq!(reader.cursor().index, 2);
430

            
431
        let mut reader = Reader::new("\"Hello\"");
432
        assert_eq!(quoted_oneline_string(&mut reader).unwrap(), "Hello");
433
        assert_eq!(reader.cursor().index, 7);
434
    }
435

            
436
    #[test]
437
    fn test_backtick_template() {
438
        let mut reader = Reader::new("``");
439
        assert_eq!(
440
            backtick_template(&mut reader).unwrap(),
441
            Template {
442
                delimiter: Some('`'),
443
                elements: vec![],
444
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 3)),
445
            }
446
        );
447
        assert_eq!(reader.cursor().index, 2);
448

            
449
        let mut reader = Reader::new("`foo#`");
450
        assert_eq!(
451
            backtick_template(&mut reader).unwrap(),
452
            Template {
453
                delimiter: Some('`'),
454
                elements: vec![TemplateElement::String {
455
                    value: "foo#".to_string(),
456
                    encoded: "foo#".to_string(),
457
                }],
458
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 7)),
459
            }
460
        );
461
        assert_eq!(reader.cursor().index, 6);
462

            
463
        let mut reader = Reader::new("`{0}`");
464
        assert_eq!(
465
            backtick_template(&mut reader).unwrap(),
466
            Template {
467
                delimiter: Some('`'),
468
                elements: vec![TemplateElement::String {
469
                    value: "{0}".to_string(),
470
                    encoded: "{0}".to_string(),
471
                }],
472
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 6)),
473
            }
474
        );
475
        assert_eq!(reader.cursor().index, 5);
476
    }
477

            
478
    #[test]
479
    fn test_backtick_template_with_backtick() {
480
        // `\`hi\``
481
        let mut reader = Reader::new("`\\`hi\\``");
482
        assert_eq!(
483
            backtick_template(&mut reader).unwrap(),
484
            Template {
485
                delimiter: Some('`'),
486
                elements: vec![TemplateElement::String {
487
                    value: "`hi`".to_string(),
488
                    encoded: "\\`hi\\`".to_string()
489
                }],
490
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 9)),
491
            }
492
        );
493
        assert_eq!(reader.cursor().index, 8);
494
    }
495

            
496
    #[test]
497
    fn test_backtick_template_error_missing_closing_backtick() {
498
        let mut reader = Reader::new("`not found");
499
        let error = backtick_template(&mut reader).err().unwrap();
500
        assert_eq!(
501
            error.pos,
502
            Pos {
503
                line: 1,
504
                column: 11
505
            }
506
        );
507
        assert!(!error.recoverable);
508
    }
509

            
510
    #[test]
511
    fn test_any_char() {
512
        let mut reader = Reader::new("a");
513
        assert_eq!(any_char(&[], &mut reader).unwrap(), ('a', "a".to_string()));
514
        assert_eq!(reader.cursor().index, 1);
515

            
516
        let mut reader = Reader::new(" ");
517
        assert_eq!(any_char(&[], &mut reader).unwrap(), (' ', " ".to_string()));
518
        assert_eq!(reader.cursor().index, 1);
519

            
520
        let mut reader = Reader::new("\\t");
521
        assert_eq!(
522
            any_char(&[], &mut reader).unwrap(),
523
            ('\t', "\\t".to_string())
524
        );
525
        assert_eq!(reader.cursor().index, 2);
526

            
527
        let mut reader = Reader::new("#");
528
        assert_eq!(any_char(&[], &mut reader).unwrap(), ('#', "#".to_string()));
529
        assert_eq!(reader.cursor().index, 1);
530
    }
531

            
532
    #[test]
533
    fn test_any_char_quote() {
534
        let mut reader = Reader::new("\\\"");
535
        assert_eq!(
536
            any_char(&[], &mut reader).unwrap(),
537
            ('"', "\\\"".to_string())
538
        );
539
        assert_eq!(reader.cursor().index, 2);
540
    }
541

            
542
    #[test]
543
    fn test_any_char_error() {
544
        let mut reader = Reader::new("");
545
        let error = any_char(&[], &mut reader).err().unwrap();
546
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
547
        assert!(error.recoverable);
548

            
549
        let mut reader = Reader::new("#");
550
        let error = any_char(&['#'], &mut reader).err().unwrap();
551
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
552
        assert!(error.recoverable);
553

            
554
        let mut reader = Reader::new("\t");
555
        let error = any_char(&[], &mut reader).err().unwrap();
556
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
557
        assert!(error.recoverable);
558
    }
559

            
560
    #[test]
561
    fn test_escape_char() {
562
        let mut reader = Reader::new("\\n");
563
        assert_eq!(escape_char(&mut reader).unwrap(), '\n');
564
        assert_eq!(reader.cursor().index, 2);
565

            
566
        let mut reader = Reader::new("\\u{0a}");
567
        assert_eq!(escape_char(&mut reader).unwrap(), '\n');
568
        assert_eq!(reader.cursor().index, 6);
569

            
570
        let mut reader = Reader::new("x");
571
        let error = escape_char(&mut reader).err().unwrap();
572
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
573
        assert_eq!(
574
            error.kind,
575
            ParseErrorKind::Expecting {
576
                value: "\\".to_string()
577
            }
578
        );
579
        assert!(error.recoverable);
580
        assert_eq!(reader.cursor().index, 0);
581
    }
582

            
583
    #[test]
584
    fn test_unicode() {
585
        let mut reader = Reader::new("{000a}");
586
        assert_eq!(unicode(&mut reader).unwrap(), '\n');
587
        assert_eq!(reader.cursor().index, 6);
588

            
589
        let mut reader = Reader::new("{E9}");
590
        assert_eq!(unicode(&mut reader).unwrap(), 'é');
591
        assert_eq!(reader.cursor().index, 4);
592
    }
593

            
594
    #[test]
595
    fn test_hex_value() {
596
        let mut reader = Reader::new("20x");
597
        assert_eq!(hex_value(&mut reader).unwrap(), 32);
598

            
599
        let mut reader = Reader::new("x");
600
        let error = hex_value(&mut reader).err().unwrap();
601
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
602
        assert_eq!(error.kind, ParseErrorKind::HexDigit);
603
        assert!(!error.recoverable);
604
    }
605

            
606
    #[test]
607
    fn test_quoted_template_benchmark() {
608
        // benchmark tests not in stable toolchain yet
609
        // Simply log duration for the time-being
610
        let mut reader = Reader::new(
611
            format!(
612
                "\"Hello World!\"{}",
613
                (0..10_000_000).map(|_| "X").collect::<String>()
614
            )
615
            .as_str(),
616
        );
617

            
618
        let now = SystemTime::now();
619
        assert!(quoted_template(&mut reader).is_ok());
620
        assert_eq!(reader.cursor().index, 14);
621
        eprintln!("duration= {}", now.elapsed().unwrap().as_nanos());
622
    }
623
}