1
/*
2
 * Hurl (https://hurl.dev)
3
 * Copyright (C) 2024 Orange
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *          http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
use crate::ast::*;
19
use crate::combinator::one_or_more;
20
use crate::parser::error::*;
21
use crate::parser::primitives::*;
22
use crate::parser::{template, ParseResult};
23
use crate::reader::Reader;
24

            
25
/// Steps:
26
/// 1- parse String until end of stream, end of line
27
///    the string does not contain trailing space
28
/// 2- templatize
29
19440
pub fn unquoted_template(reader: &mut Reader) -> ParseResult<Template> {
30
19440
    let start = reader.cursor();
31
19440
    let mut chars = vec![];
32
19440
    let mut spaces = vec![];
33
19440
    let mut end = start;
34
    loop {
35
551800
        let pos = reader.cursor().pos;
36
551800
        match any_char(&['#'], reader) {
37
19440
            Err(e) => {
38
19440
                if e.recoverable {
39
19440
                    break;
40
                } else {
41
                    return Err(e);
42
                }
43
            }
44
532360
            Ok((c, s)) => {
45
532360
                if s == "\n" {
46
                    break;
47
                }
48
532360
                if s == " " {
49
1640
                    spaces.push((c, s, pos));
50
1640
                } else {
51
530720
                    if !spaces.is_empty() {
52
1100
                        chars.append(&mut spaces);
53
1100
                        spaces = vec![];
54
                    }
55
530720
                    chars.push((c, s, pos));
56
530720
                    end = reader.cursor();
57
                }
58
            }
59
        }
60
    }
61
19440
    reader.seek(end);
62
19440
    let encoded_string = template::EncodedString {
63
19440
        source_info: SourceInfo::new(start.pos, end.pos),
64
19440
        chars,
65
19440
    };
66
19440
    let elements = template::templatize(encoded_string)?;
67
19435
    Ok(Template {
68
19435
        delimiter: None,
69
19435
        elements,
70
19435
        source_info: SourceInfo::new(start.pos, end.pos),
71
19435
    })
72
}
73

            
74
// TODO: should return an EncodedString
75
// (decoding escape sequence)
76
640
pub fn quoted_oneline_string(reader: &mut Reader) -> ParseResult<String> {
77
640
    literal("\"", reader)?;
78
8853
    let s = reader.read_while(|c| c != '"' && c != '\n');
79
640
    literal("\"", reader)?;
80
635
    Ok(s)
81
}
82

            
83
22870
pub fn quoted_template(reader: &mut Reader) -> ParseResult<Template> {
84
22870
    let start = reader.cursor();
85
22870
    let mut end = start;
86
22870
    try_literal("\"", reader)?;
87
21950
    let mut chars = vec![];
88
    loop {
89
292890
        let pos = reader.cursor().pos;
90
292890
        let save = reader.cursor();
91
292890
        match any_char(&['"'], reader) {
92
21950
            Err(e) => {
93
21950
                if e.recoverable {
94
21945
                    reader.seek(save);
95
21945
                    break;
96
                } else {
97
5
                    return Err(e);
98
                }
99
            }
100
270940
            Ok((c, s)) => {
101
270940
                chars.push((c, s, pos));
102
270940
                end = reader.cursor();
103
            }
104
        }
105
    }
106
21945
    literal("\"", reader)?;
107
21940
    let encoded_string = template::EncodedString {
108
21940
        source_info: SourceInfo::new(start.pos, end.pos),
109
21940
        chars,
110
21940
    };
111
21940
    let elements = template::templatize(encoded_string)?;
112
21940
    Ok(Template {
113
21940
        delimiter: Some('"'),
114
21940
        elements,
115
21940
        source_info: SourceInfo::new(start.pos, reader.cursor().pos),
116
21940
    })
117
}
118

            
119
26555
pub fn backtick_template(reader: &mut Reader) -> ParseResult<Template> {
120
26555
    let delimiter = Some('`');
121
26555
    let start = reader.cursor();
122
26555
    let mut end = start;
123
26555
    try_literal("`", reader)?;
124
1515
    let mut chars = vec![];
125
    loop {
126
30000
        let pos = reader.cursor().pos;
127
30000
        let save = reader.cursor();
128
30000
        match any_char(&['`', '\n'], reader) {
129
1515
            Err(e) => {
130
1515
                if e.recoverable {
131
1515
                    reader.seek(save);
132
1515
                    break;
133
                } else {
134
                    return Err(e);
135
                }
136
            }
137
28485
            Ok((c, s)) => {
138
28485
                chars.push((c, s, pos));
139
28485
                end = reader.cursor();
140
            }
141
        }
142
    }
143
1515
    literal("`", reader)?;
144
1515
    let encoded_string = template::EncodedString {
145
1515
        source_info: SourceInfo::new(start.pos, end.pos),
146
1515
        chars,
147
1515
    };
148
1515
    let elements = template::templatize(encoded_string)?;
149
1515
    Ok(Template {
150
1515
        delimiter,
151
1515
        elements,
152
1515
        source_info: SourceInfo::new(start.pos, reader.cursor().pos),
153
1515
    })
154
}
155

            
156
874690
fn any_char(except: &[char], reader: &mut Reader) -> ParseResult<(char, String)> {
157
874690
    let start = reader.cursor();
158
874690
    match escape_char(reader) {
159
2320
        Ok(c) => Ok((c, reader.read_from(start.index))),
160
872370
        Err(e) => {
161
872370
            if e.recoverable {
162
872365
                reader.seek(start);
163
872365
                match reader.read() {
164
                    None => {
165
640
                        let kind = ParseErrorKind::Expecting {
166
640
                            value: "char".to_string(),
167
640
                        };
168
640
                        Err(ParseError::new(start.pos, true, kind))
169
                    }
170
871725
                    Some(c) => {
171
871725
                        if except.contains(&c)
172
848090
                            || ['\\', '\x08', '\n', '\x0c', '\r', '\t'].contains(&c)
173
                        {
174
42260
                            let kind = ParseErrorKind::Expecting {
175
42260
                                value: "char".to_string(),
176
42260
                            };
177
42260
                            Err(ParseError::new(start.pos, true, kind))
178
                        } else {
179
829465
                            Ok((c, reader.read_from(start.index)))
180
                        }
181
                    }
182
                }
183
            } else {
184
5
                Err(e)
185
            }
186
        }
187
    }
188
}
189

            
190
875425
pub fn escape_char(reader: &mut Reader) -> ParseResult<char> {
191
875425
    try_literal("\\", reader)?;
192
2365
    let start = reader.cursor();
193
2365
    match reader.read() {
194
35
        Some('#') => Ok('#'),
195
660
        Some('"') => Ok('"'),
196
        Some('`') => Ok('`'),
197
395
        Some('\\') => Ok('\\'),
198
        Some('/') => Ok('/'),
199
        Some('b') => Ok('\x08'),
200
1090
        Some('n') => Ok('\n'),
201
        Some('f') => Ok('\x0c'),
202
20
        Some('r') => Ok('\r'),
203
35
        Some('t') => Ok('\t'),
204
125
        Some('u') => unicode(reader),
205
5
        _ => Err(ParseError::new(
206
5
            start.pos,
207
5
            false,
208
5
            ParseErrorKind::EscapeChar,
209
5
        )),
210
    }
211
}
212

            
213
205
pub(crate) fn unicode(reader: &mut Reader) -> ParseResult<char> {
214
205
    literal("{", reader)?;
215
205
    let v = hex_value(reader)?;
216
205
    let c = match std::char::from_u32(v) {
217
        None => {
218
            return Err(ParseError::new(
219
                reader.cursor().pos,
220
                false,
221
                ParseErrorKind::Unicode,
222
            ))
223
        }
224
205
        Some(c) => c,
225
205
    };
226
205
    literal("}", reader)?;
227
205
    Ok(c)
228
}
229

            
230
205
fn hex_value(reader: &mut Reader) -> ParseResult<u32> {
231
205
    let mut digits = one_or_more(hex_digit, reader)?;
232
205
    let mut v = 0;
233
205
    let mut weight = 1;
234
205
    digits.reverse();
235
565
    for d in digits.iter() {
236
565
        v += weight * d;
237
565
        weight *= 16;
238
    }
239
205
    Ok(v)
240
}
241

            
242
#[cfg(test)]
243
mod tests {
244
    use std::time::SystemTime;
245

            
246
    use super::*;
247
    use crate::reader::Pos;
248

            
249
    #[test]
250
    fn test_unquoted_template_empty() {
251
        let mut reader = Reader::new("");
252
        assert_eq!(
253
            unquoted_template(&mut reader).unwrap(),
254
            Template {
255
                delimiter: None,
256
                elements: vec![],
257
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 1)),
258
            }
259
        );
260
        assert_eq!(reader.cursor().index, 0);
261
    }
262

            
263
    #[test]
264
    fn test_unquoted_template_with_hash() {
265
        let mut reader = Reader::new("a#");
266
        assert_eq!(
267
            unquoted_template(&mut reader).unwrap(),
268
            Template {
269
                delimiter: None,
270
                elements: vec![TemplateElement::String {
271
                    value: "a".to_string(),
272
                    encoded: "a".to_string(),
273
                }],
274
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 2)),
275
            }
276
        );
277
        assert_eq!(reader.cursor().index, 1);
278
    }
279

            
280
    #[test]
281
    fn test_unquoted_template_with_encoded_hash() {
282
        let mut reader = Reader::new("a\\u{23}");
283
        assert_eq!(
284
            unquoted_template(&mut reader).unwrap(),
285
            Template {
286
                delimiter: None,
287
                elements: vec![TemplateElement::String {
288
                    value: "a#".to_string(),
289
                    encoded: "a\\u{23}".to_string(),
290
                }],
291
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 8)),
292
            }
293
        );
294
        assert_eq!(reader.cursor().index, 7);
295
    }
296

            
297
    #[test]
298
    fn test_unquoted_template_with_quote() {
299
        let mut reader = Reader::new("\"hi\"");
300
        assert_eq!(
301
            unquoted_template(&mut reader).unwrap(),
302
            Template {
303
                delimiter: None,
304
                elements: vec![TemplateElement::String {
305
                    value: "\"hi\"".to_string(),
306
                    encoded: "\"hi\"".to_string(),
307
                }],
308
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 5)),
309
            }
310
        );
311
        assert_eq!(reader.cursor().index, 4);
312
    }
313

            
314
    #[test]
315
    fn test_unquoted_template_hello_world() {
316
        let mut reader = Reader::new("hello\\u{20}{{name}}!");
317
        assert_eq!(
318
            unquoted_template(&mut reader).unwrap(),
319
            Template {
320
                delimiter: None,
321
                elements: vec![
322
                    TemplateElement::String {
323
                        value: "hello ".to_string(),
324
                        encoded: "hello\\u{20}".to_string(),
325
                    },
326
                    TemplateElement::Placeholder(Placeholder {
327
                        space0: Whitespace {
328
                            value: String::new(),
329
                            source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 14)),
330
                        },
331
                        expr: Expr {
332
                            kind: ExprKind::Variable(Variable {
333
                                name: "name".to_string(),
334
                                source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 18)),
335
                            }),
336
                            source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 18)),
337
                        },
338
                        space1: Whitespace {
339
                            value: String::new(),
340
                            source_info: SourceInfo::new(Pos::new(1, 18), Pos::new(1, 18)),
341
                        },
342
                    }),
343
                    TemplateElement::String {
344
                        value: "!".to_string(),
345
                        encoded: "!".to_string(),
346
                    },
347
                ],
348
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 21)),
349
            }
350
        );
351
        assert_eq!(reader.cursor().index, 20);
352
    }
353

            
354
    #[test]
355
    fn test_quoted_template() {
356
        let mut reader = Reader::new("\"\"");
357
        assert_eq!(
358
            quoted_template(&mut reader).unwrap(),
359
            Template {
360
                delimiter: Some('"'),
361
                elements: vec![],
362
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 3)),
363
            }
364
        );
365
        assert_eq!(reader.cursor().index, 2);
366

            
367
        let mut reader = Reader::new("\"a#\"");
368
        assert_eq!(
369
            quoted_template(&mut reader).unwrap(),
370
            Template {
371
                delimiter: Some('"'),
372
                elements: vec![TemplateElement::String {
373
                    value: "a#".to_string(),
374
                    encoded: "a#".to_string(),
375
                }],
376
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 5)),
377
            }
378
        );
379
        assert_eq!(reader.cursor().index, 4);
380

            
381
        let mut reader = Reader::new("\"{0}\"");
382
        assert_eq!(
383
            quoted_template(&mut reader).unwrap(),
384
            Template {
385
                delimiter: Some('"'),
386
                elements: vec![TemplateElement::String {
387
                    value: "{0}".to_string(),
388
                    encoded: "{0}".to_string(),
389
                }],
390
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 6)),
391
            }
392
        );
393
        assert_eq!(reader.cursor().index, 5);
394
    }
395

            
396
    #[test]
397
    fn test_quoted_template_with_quote() {
398
        // "\"hi\""
399
        let mut reader = Reader::new("\"\\\"hi\\\"\"");
400
        assert_eq!(
401
            quoted_template(&mut reader).unwrap(),
402
            Template {
403
                delimiter: Some('"'),
404
                elements: vec![TemplateElement::String {
405
                    value: "\"hi\"".to_string(),
406
                    encoded: "\\\"hi\\\"".to_string()
407
                }],
408
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 9)),
409
            }
410
        );
411
        assert_eq!(reader.cursor().index, 8);
412
    }
413

            
414
    #[test]
415
    fn test_quoted_template_error_missing_closing_quote() {
416
        let mut reader = Reader::new("\"not found");
417
        let error = quoted_template(&mut reader).err().unwrap();
418
        assert_eq!(
419
            error.pos,
420
            Pos {
421
                line: 1,
422
                column: 11
423
            }
424
        );
425
        assert!(!error.recoverable);
426
    }
427

            
428
    #[test]
429
    fn test_quoted_string() {
430
        let mut reader = Reader::new("\"\"");
431
        assert_eq!(quoted_oneline_string(&mut reader).unwrap(), "");
432
        assert_eq!(reader.cursor().index, 2);
433

            
434
        let mut reader = Reader::new("\"Hello\"");
435
        assert_eq!(quoted_oneline_string(&mut reader).unwrap(), "Hello");
436
        assert_eq!(reader.cursor().index, 7);
437
    }
438

            
439
    #[test]
440
    fn test_backtick_template() {
441
        let mut reader = Reader::new("``");
442
        assert_eq!(
443
            backtick_template(&mut reader).unwrap(),
444
            Template {
445
                delimiter: Some('`'),
446
                elements: vec![],
447
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 3)),
448
            }
449
        );
450
        assert_eq!(reader.cursor().index, 2);
451

            
452
        let mut reader = Reader::new("`foo#`");
453
        assert_eq!(
454
            backtick_template(&mut reader).unwrap(),
455
            Template {
456
                delimiter: Some('`'),
457
                elements: vec![TemplateElement::String {
458
                    value: "foo#".to_string(),
459
                    encoded: "foo#".to_string(),
460
                }],
461
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 7)),
462
            }
463
        );
464
        assert_eq!(reader.cursor().index, 6);
465

            
466
        let mut reader = Reader::new("`{0}`");
467
        assert_eq!(
468
            backtick_template(&mut reader).unwrap(),
469
            Template {
470
                delimiter: Some('`'),
471
                elements: vec![TemplateElement::String {
472
                    value: "{0}".to_string(),
473
                    encoded: "{0}".to_string(),
474
                }],
475
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 6)),
476
            }
477
        );
478
        assert_eq!(reader.cursor().index, 5);
479
    }
480

            
481
    #[test]
482
    fn test_backtick_template_with_backtick() {
483
        // `\`hi\``
484
        let mut reader = Reader::new("`\\`hi\\``");
485
        assert_eq!(
486
            backtick_template(&mut reader).unwrap(),
487
            Template {
488
                delimiter: Some('`'),
489
                elements: vec![TemplateElement::String {
490
                    value: "`hi`".to_string(),
491
                    encoded: "\\`hi\\`".to_string()
492
                }],
493
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 9)),
494
            }
495
        );
496
        assert_eq!(reader.cursor().index, 8);
497
    }
498

            
499
    #[test]
500
    fn test_backtick_template_error_missing_closing_backtick() {
501
        let mut reader = Reader::new("`not found");
502
        let error = backtick_template(&mut reader).err().unwrap();
503
        assert_eq!(
504
            error.pos,
505
            Pos {
506
                line: 1,
507
                column: 11
508
            }
509
        );
510
        assert!(!error.recoverable);
511
    }
512

            
513
    #[test]
514
    fn test_any_char() {
515
        let mut reader = Reader::new("a");
516
        assert_eq!(any_char(&[], &mut reader).unwrap(), ('a', "a".to_string()));
517
        assert_eq!(reader.cursor().index, 1);
518

            
519
        let mut reader = Reader::new(" ");
520
        assert_eq!(any_char(&[], &mut reader).unwrap(), (' ', " ".to_string()));
521
        assert_eq!(reader.cursor().index, 1);
522

            
523
        let mut reader = Reader::new("\\t");
524
        assert_eq!(
525
            any_char(&[], &mut reader).unwrap(),
526
            ('\t', "\\t".to_string())
527
        );
528
        assert_eq!(reader.cursor().index, 2);
529

            
530
        let mut reader = Reader::new("#");
531
        assert_eq!(any_char(&[], &mut reader).unwrap(), ('#', "#".to_string()));
532
        assert_eq!(reader.cursor().index, 1);
533
    }
534

            
535
    #[test]
536
    fn test_any_char_quote() {
537
        let mut reader = Reader::new("\\\"");
538
        assert_eq!(
539
            any_char(&[], &mut reader).unwrap(),
540
            ('"', "\\\"".to_string())
541
        );
542
        assert_eq!(reader.cursor().index, 2);
543
    }
544

            
545
    #[test]
546
    fn test_any_char_error() {
547
        let mut reader = Reader::new("");
548
        let error = any_char(&[], &mut reader).err().unwrap();
549
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
550
        assert!(error.recoverable);
551

            
552
        let mut reader = Reader::new("#");
553
        let error = any_char(&['#'], &mut reader).err().unwrap();
554
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
555
        assert!(error.recoverable);
556

            
557
        let mut reader = Reader::new("\t");
558
        let error = any_char(&[], &mut reader).err().unwrap();
559
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
560
        assert!(error.recoverable);
561
    }
562

            
563
    #[test]
564
    fn test_escape_char() {
565
        let mut reader = Reader::new("\\n");
566
        assert_eq!(escape_char(&mut reader).unwrap(), '\n');
567
        assert_eq!(reader.cursor().index, 2);
568

            
569
        let mut reader = Reader::new("\\u{0a}");
570
        assert_eq!(escape_char(&mut reader).unwrap(), '\n');
571
        assert_eq!(reader.cursor().index, 6);
572

            
573
        let mut reader = Reader::new("x");
574
        let error = escape_char(&mut reader).err().unwrap();
575
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
576
        assert_eq!(
577
            error.kind,
578
            ParseErrorKind::Expecting {
579
                value: "\\".to_string()
580
            }
581
        );
582
        assert!(error.recoverable);
583
        assert_eq!(reader.cursor().index, 0);
584
    }
585

            
586
    #[test]
587
    fn test_unicode() {
588
        let mut reader = Reader::new("{000a}");
589
        assert_eq!(unicode(&mut reader).unwrap(), '\n');
590
        assert_eq!(reader.cursor().index, 6);
591

            
592
        let mut reader = Reader::new("{E9}");
593
        assert_eq!(unicode(&mut reader).unwrap(), 'é');
594
        assert_eq!(reader.cursor().index, 4);
595
    }
596

            
597
    #[test]
598
    fn test_hex_value() {
599
        let mut reader = Reader::new("20x");
600
        assert_eq!(hex_value(&mut reader).unwrap(), 32);
601

            
602
        let mut reader = Reader::new("x");
603
        let error = hex_value(&mut reader).err().unwrap();
604
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
605
        assert_eq!(error.kind, ParseErrorKind::HexDigit);
606
        assert!(!error.recoverable);
607
    }
608

            
609
    #[test]
610
    fn test_quoted_template_benchmark() {
611
        // benchmark tests not in stable toolchain yet
612
        // Simply log duration for the time-being
613
        let mut reader = Reader::new(
614
            format!(
615
                "\"Hello World!\"{}",
616
                (0..10_000_000).map(|_| "X").collect::<String>()
617
            )
618
            .as_str(),
619
        );
620

            
621
        let now = SystemTime::now();
622
        assert!(quoted_template(&mut reader).is_ok());
623
        assert_eq!(reader.cursor().index, 14);
624
        eprintln!("duration= {}", now.elapsed().unwrap().as_nanos());
625
    }
626
}