1
/*
2
 * Hurl (https://hurl.dev)
3
 * Copyright (C) 2024 Orange
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *          http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
use crate::ast::{SourceInfo, Template};
19
use crate::combinator::one_or_more;
20
use crate::parser::primitives::{hex_digit, literal, try_literal};
21
use crate::parser::{template, ParseError, ParseErrorKind, ParseResult};
22
use crate::reader::Reader;
23

            
24
/// Steps:
25
/// 1- parse String until end of stream, end of line
26
///    the string does not contain trailing space
27
/// 2- templatize
28
19525
pub fn unquoted_template(reader: &mut Reader) -> ParseResult<Template> {
29
19525
    let start = reader.cursor();
30
19525
    let mut chars = vec![];
31
19525
    let mut spaces = vec![];
32
19525
    let mut end = start;
33
    loop {
34
552825
        let pos = reader.cursor().pos;
35
552825
        match any_char(&['#'], reader) {
36
19525
            Err(e) => {
37
19525
                if e.recoverable {
38
19525
                    break;
39
                } else {
40
                    return Err(e);
41
                }
42
            }
43
533300
            Ok((c, s)) => {
44
533300
                if s == "\n" {
45
                    break;
46
                }
47
533300
                if s == " " {
48
1640
                    spaces.push((c, s, pos));
49
1640
                } else {
50
531660
                    if !spaces.is_empty() {
51
1100
                        chars.append(&mut spaces);
52
1100
                        spaces = vec![];
53
                    }
54
531660
                    chars.push((c, s, pos));
55
531660
                    end = reader.cursor();
56
                }
57
            }
58
        }
59
    }
60
19525
    reader.seek(end);
61
19525
    let encoded_string = template::EncodedString {
62
19525
        source_info: SourceInfo::new(start.pos, end.pos),
63
19525
        chars,
64
19525
    };
65
19525
    let elements = template::templatize(encoded_string)?;
66
19520
    Ok(Template {
67
19520
        delimiter: None,
68
19520
        elements,
69
19520
        source_info: SourceInfo::new(start.pos, end.pos),
70
19520
    })
71
}
72

            
73
// TODO: should return an EncodedString
74
// (decoding escape sequence)
75
640
pub fn quoted_oneline_string(reader: &mut Reader) -> ParseResult<String> {
76
640
    literal("\"", reader)?;
77
8853
    let s = reader.read_while(|c| c != '"' && c != '\n');
78
640
    literal("\"", reader)?;
79
635
    Ok(s)
80
}
81

            
82
23075
pub fn quoted_template(reader: &mut Reader) -> ParseResult<Template> {
83
23075
    let start = reader.cursor();
84
23075
    let mut end = start;
85
23075
    try_literal("\"", reader)?;
86
22140
    let mut chars = vec![];
87
    loop {
88
294445
        let pos = reader.cursor().pos;
89
294445
        let save = reader.cursor();
90
294445
        match any_char(&['"'], reader) {
91
22140
            Err(e) => {
92
22140
                if e.recoverable {
93
22135
                    reader.seek(save);
94
22135
                    break;
95
                } else {
96
5
                    return Err(e);
97
                }
98
            }
99
272305
            Ok((c, s)) => {
100
272305
                chars.push((c, s, pos));
101
272305
                end = reader.cursor();
102
            }
103
        }
104
    }
105
22135
    literal("\"", reader)?;
106
22130
    let encoded_string = template::EncodedString {
107
22130
        source_info: SourceInfo::new(start.pos, end.pos),
108
22130
        chars,
109
22130
    };
110
22130
    let elements = template::templatize(encoded_string)?;
111
22130
    Ok(Template {
112
22130
        delimiter: Some('"'),
113
22130
        elements,
114
22130
        source_info: SourceInfo::new(start.pos, reader.cursor().pos),
115
22130
    })
116
}
117

            
118
26645
pub fn backtick_template(reader: &mut Reader) -> ParseResult<Template> {
119
26645
    let delimiter = Some('`');
120
26645
    let start = reader.cursor();
121
26645
    let mut end = start;
122
26645
    try_literal("`", reader)?;
123
1515
    let mut chars = vec![];
124
    loop {
125
30000
        let pos = reader.cursor().pos;
126
30000
        let save = reader.cursor();
127
30000
        match any_char(&['`', '\n'], reader) {
128
1515
            Err(e) => {
129
1515
                if e.recoverable {
130
1515
                    reader.seek(save);
131
1515
                    break;
132
                } else {
133
                    return Err(e);
134
                }
135
            }
136
28485
            Ok((c, s)) => {
137
28485
                chars.push((c, s, pos));
138
28485
                end = reader.cursor();
139
            }
140
        }
141
    }
142
1515
    literal("`", reader)?;
143
1515
    let encoded_string = template::EncodedString {
144
1515
        source_info: SourceInfo::new(start.pos, end.pos),
145
1515
        chars,
146
1515
    };
147
1515
    let elements = template::templatize(encoded_string)?;
148
1515
    Ok(Template {
149
1515
        delimiter,
150
1515
        elements,
151
1515
        source_info: SourceInfo::new(start.pos, reader.cursor().pos),
152
1515
    })
153
}
154

            
155
877270
fn any_char(except: &[char], reader: &mut Reader) -> ParseResult<(char, String)> {
156
877270
    let start = reader.cursor();
157
877270
    match escape_char(reader) {
158
2320
        Ok(c) => Ok((c, reader.read_from(start.index))),
159
874950
        Err(e) => {
160
874950
            if e.recoverable {
161
874945
                reader.seek(start);
162
874945
                match reader.read() {
163
                    None => {
164
640
                        let kind = ParseErrorKind::Expecting {
165
640
                            value: "char".to_string(),
166
640
                        };
167
640
                        Err(ParseError::new(start.pos, true, kind))
168
                    }
169
874305
                    Some(c) => {
170
874305
                        if except.contains(&c)
171
850480
                            || ['\\', '\x08', '\n', '\x0c', '\r', '\t'].contains(&c)
172
                        {
173
42535
                            let kind = ParseErrorKind::Expecting {
174
42535
                                value: "char".to_string(),
175
42535
                            };
176
42535
                            Err(ParseError::new(start.pos, true, kind))
177
                        } else {
178
831770
                            Ok((c, reader.read_from(start.index)))
179
                        }
180
                    }
181
                }
182
            } else {
183
5
                Err(e)
184
            }
185
        }
186
    }
187
}
188

            
189
878005
pub fn escape_char(reader: &mut Reader) -> ParseResult<char> {
190
878005
    try_literal("\\", reader)?;
191
2365
    let start = reader.cursor();
192
2365
    match reader.read() {
193
35
        Some('#') => Ok('#'),
194
660
        Some('"') => Ok('"'),
195
        Some('`') => Ok('`'),
196
395
        Some('\\') => Ok('\\'),
197
        Some('/') => Ok('/'),
198
        Some('b') => Ok('\x08'),
199
1090
        Some('n') => Ok('\n'),
200
        Some('f') => Ok('\x0c'),
201
20
        Some('r') => Ok('\r'),
202
35
        Some('t') => Ok('\t'),
203
125
        Some('u') => unicode(reader),
204
5
        _ => Err(ParseError::new(
205
5
            start.pos,
206
5
            false,
207
5
            ParseErrorKind::EscapeChar,
208
5
        )),
209
    }
210
}
211

            
212
205
pub(crate) fn unicode(reader: &mut Reader) -> ParseResult<char> {
213
205
    literal("{", reader)?;
214
205
    let v = hex_value(reader)?;
215
205
    let c = match std::char::from_u32(v) {
216
        None => {
217
            return Err(ParseError::new(
218
                reader.cursor().pos,
219
                false,
220
                ParseErrorKind::Unicode,
221
            ))
222
        }
223
205
        Some(c) => c,
224
205
    };
225
205
    literal("}", reader)?;
226
205
    Ok(c)
227
}
228

            
229
205
fn hex_value(reader: &mut Reader) -> ParseResult<u32> {
230
205
    let mut digits = one_or_more(hex_digit, reader)?;
231
205
    let mut v = 0;
232
205
    let mut weight = 1;
233
205
    digits.reverse();
234
565
    for d in digits.iter() {
235
565
        v += weight * d;
236
565
        weight *= 16;
237
    }
238
205
    Ok(v)
239
}
240

            
241
#[cfg(test)]
242
mod tests {
243
    use super::*;
244
    use crate::ast::{Expr, ExprKind, Placeholder, TemplateElement, Variable, Whitespace};
245
    use crate::reader::Pos;
246
    use std::time::SystemTime;
247

            
248
    #[test]
249
    fn test_unquoted_template_empty() {
250
        let mut reader = Reader::new("");
251
        assert_eq!(
252
            unquoted_template(&mut reader).unwrap(),
253
            Template {
254
                delimiter: None,
255
                elements: vec![],
256
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 1)),
257
            }
258
        );
259
        assert_eq!(reader.cursor().index, 0);
260
    }
261

            
262
    #[test]
263
    fn test_unquoted_template_with_hash() {
264
        let mut reader = Reader::new("a#");
265
        assert_eq!(
266
            unquoted_template(&mut reader).unwrap(),
267
            Template {
268
                delimiter: None,
269
                elements: vec![TemplateElement::String {
270
                    value: "a".to_string(),
271
                    encoded: "a".to_string(),
272
                }],
273
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 2)),
274
            }
275
        );
276
        assert_eq!(reader.cursor().index, 1);
277
    }
278

            
279
    #[test]
280
    fn test_unquoted_template_with_encoded_hash() {
281
        let mut reader = Reader::new("a\\u{23}");
282
        assert_eq!(
283
            unquoted_template(&mut reader).unwrap(),
284
            Template {
285
                delimiter: None,
286
                elements: vec![TemplateElement::String {
287
                    value: "a#".to_string(),
288
                    encoded: "a\\u{23}".to_string(),
289
                }],
290
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 8)),
291
            }
292
        );
293
        assert_eq!(reader.cursor().index, 7);
294
    }
295

            
296
    #[test]
297
    fn test_unquoted_template_with_quote() {
298
        let mut reader = Reader::new("\"hi\"");
299
        assert_eq!(
300
            unquoted_template(&mut reader).unwrap(),
301
            Template {
302
                delimiter: None,
303
                elements: vec![TemplateElement::String {
304
                    value: "\"hi\"".to_string(),
305
                    encoded: "\"hi\"".to_string(),
306
                }],
307
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 5)),
308
            }
309
        );
310
        assert_eq!(reader.cursor().index, 4);
311
    }
312

            
313
    #[test]
314
    fn test_unquoted_template_hello_world() {
315
        let mut reader = Reader::new("hello\\u{20}{{name}}!");
316
        assert_eq!(
317
            unquoted_template(&mut reader).unwrap(),
318
            Template {
319
                delimiter: None,
320
                elements: vec![
321
                    TemplateElement::String {
322
                        value: "hello ".to_string(),
323
                        encoded: "hello\\u{20}".to_string(),
324
                    },
325
                    TemplateElement::Placeholder(Placeholder {
326
                        space0: Whitespace {
327
                            value: String::new(),
328
                            source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 14)),
329
                        },
330
                        expr: Expr {
331
                            kind: ExprKind::Variable(Variable {
332
                                name: "name".to_string(),
333
                                source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 18)),
334
                            }),
335
                            source_info: SourceInfo::new(Pos::new(1, 14), Pos::new(1, 18)),
336
                        },
337
                        space1: Whitespace {
338
                            value: String::new(),
339
                            source_info: SourceInfo::new(Pos::new(1, 18), Pos::new(1, 18)),
340
                        },
341
                    }),
342
                    TemplateElement::String {
343
                        value: "!".to_string(),
344
                        encoded: "!".to_string(),
345
                    },
346
                ],
347
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 21)),
348
            }
349
        );
350
        assert_eq!(reader.cursor().index, 20);
351
    }
352

            
353
    #[test]
354
    fn test_quoted_template() {
355
        let mut reader = Reader::new("\"\"");
356
        assert_eq!(
357
            quoted_template(&mut reader).unwrap(),
358
            Template {
359
                delimiter: Some('"'),
360
                elements: vec![],
361
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 3)),
362
            }
363
        );
364
        assert_eq!(reader.cursor().index, 2);
365

            
366
        let mut reader = Reader::new("\"a#\"");
367
        assert_eq!(
368
            quoted_template(&mut reader).unwrap(),
369
            Template {
370
                delimiter: Some('"'),
371
                elements: vec![TemplateElement::String {
372
                    value: "a#".to_string(),
373
                    encoded: "a#".to_string(),
374
                }],
375
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 5)),
376
            }
377
        );
378
        assert_eq!(reader.cursor().index, 4);
379

            
380
        let mut reader = Reader::new("\"{0}\"");
381
        assert_eq!(
382
            quoted_template(&mut reader).unwrap(),
383
            Template {
384
                delimiter: Some('"'),
385
                elements: vec![TemplateElement::String {
386
                    value: "{0}".to_string(),
387
                    encoded: "{0}".to_string(),
388
                }],
389
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 6)),
390
            }
391
        );
392
        assert_eq!(reader.cursor().index, 5);
393
    }
394

            
395
    #[test]
396
    fn test_quoted_template_with_quote() {
397
        // "\"hi\""
398
        let mut reader = Reader::new("\"\\\"hi\\\"\"");
399
        assert_eq!(
400
            quoted_template(&mut reader).unwrap(),
401
            Template {
402
                delimiter: Some('"'),
403
                elements: vec![TemplateElement::String {
404
                    value: "\"hi\"".to_string(),
405
                    encoded: "\\\"hi\\\"".to_string()
406
                }],
407
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 9)),
408
            }
409
        );
410
        assert_eq!(reader.cursor().index, 8);
411
    }
412

            
413
    #[test]
414
    fn test_quoted_template_error_missing_closing_quote() {
415
        let mut reader = Reader::new("\"not found");
416
        let error = quoted_template(&mut reader).err().unwrap();
417
        assert_eq!(
418
            error.pos,
419
            Pos {
420
                line: 1,
421
                column: 11
422
            }
423
        );
424
        assert!(!error.recoverable);
425
    }
426

            
427
    #[test]
428
    fn test_quoted_string() {
429
        let mut reader = Reader::new("\"\"");
430
        assert_eq!(quoted_oneline_string(&mut reader).unwrap(), "");
431
        assert_eq!(reader.cursor().index, 2);
432

            
433
        let mut reader = Reader::new("\"Hello\"");
434
        assert_eq!(quoted_oneline_string(&mut reader).unwrap(), "Hello");
435
        assert_eq!(reader.cursor().index, 7);
436
    }
437

            
438
    #[test]
439
    fn test_backtick_template() {
440
        let mut reader = Reader::new("``");
441
        assert_eq!(
442
            backtick_template(&mut reader).unwrap(),
443
            Template {
444
                delimiter: Some('`'),
445
                elements: vec![],
446
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 3)),
447
            }
448
        );
449
        assert_eq!(reader.cursor().index, 2);
450

            
451
        let mut reader = Reader::new("`foo#`");
452
        assert_eq!(
453
            backtick_template(&mut reader).unwrap(),
454
            Template {
455
                delimiter: Some('`'),
456
                elements: vec![TemplateElement::String {
457
                    value: "foo#".to_string(),
458
                    encoded: "foo#".to_string(),
459
                }],
460
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 7)),
461
            }
462
        );
463
        assert_eq!(reader.cursor().index, 6);
464

            
465
        let mut reader = Reader::new("`{0}`");
466
        assert_eq!(
467
            backtick_template(&mut reader).unwrap(),
468
            Template {
469
                delimiter: Some('`'),
470
                elements: vec![TemplateElement::String {
471
                    value: "{0}".to_string(),
472
                    encoded: "{0}".to_string(),
473
                }],
474
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 6)),
475
            }
476
        );
477
        assert_eq!(reader.cursor().index, 5);
478
    }
479

            
480
    #[test]
481
    fn test_backtick_template_with_backtick() {
482
        // `\`hi\``
483
        let mut reader = Reader::new("`\\`hi\\``");
484
        assert_eq!(
485
            backtick_template(&mut reader).unwrap(),
486
            Template {
487
                delimiter: Some('`'),
488
                elements: vec![TemplateElement::String {
489
                    value: "`hi`".to_string(),
490
                    encoded: "\\`hi\\`".to_string()
491
                }],
492
                source_info: SourceInfo::new(Pos::new(1, 1), Pos::new(1, 9)),
493
            }
494
        );
495
        assert_eq!(reader.cursor().index, 8);
496
    }
497

            
498
    #[test]
499
    fn test_backtick_template_error_missing_closing_backtick() {
500
        let mut reader = Reader::new("`not found");
501
        let error = backtick_template(&mut reader).err().unwrap();
502
        assert_eq!(
503
            error.pos,
504
            Pos {
505
                line: 1,
506
                column: 11
507
            }
508
        );
509
        assert!(!error.recoverable);
510
    }
511

            
512
    #[test]
513
    fn test_any_char() {
514
        let mut reader = Reader::new("a");
515
        assert_eq!(any_char(&[], &mut reader).unwrap(), ('a', "a".to_string()));
516
        assert_eq!(reader.cursor().index, 1);
517

            
518
        let mut reader = Reader::new(" ");
519
        assert_eq!(any_char(&[], &mut reader).unwrap(), (' ', " ".to_string()));
520
        assert_eq!(reader.cursor().index, 1);
521

            
522
        let mut reader = Reader::new("\\t");
523
        assert_eq!(
524
            any_char(&[], &mut reader).unwrap(),
525
            ('\t', "\\t".to_string())
526
        );
527
        assert_eq!(reader.cursor().index, 2);
528

            
529
        let mut reader = Reader::new("#");
530
        assert_eq!(any_char(&[], &mut reader).unwrap(), ('#', "#".to_string()));
531
        assert_eq!(reader.cursor().index, 1);
532
    }
533

            
534
    #[test]
535
    fn test_any_char_quote() {
536
        let mut reader = Reader::new("\\\"");
537
        assert_eq!(
538
            any_char(&[], &mut reader).unwrap(),
539
            ('"', "\\\"".to_string())
540
        );
541
        assert_eq!(reader.cursor().index, 2);
542
    }
543

            
544
    #[test]
545
    fn test_any_char_error() {
546
        let mut reader = Reader::new("");
547
        let error = any_char(&[], &mut reader).err().unwrap();
548
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
549
        assert!(error.recoverable);
550

            
551
        let mut reader = Reader::new("#");
552
        let error = any_char(&['#'], &mut reader).err().unwrap();
553
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
554
        assert!(error.recoverable);
555

            
556
        let mut reader = Reader::new("\t");
557
        let error = any_char(&[], &mut reader).err().unwrap();
558
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
559
        assert!(error.recoverable);
560
    }
561

            
562
    #[test]
563
    fn test_escape_char() {
564
        let mut reader = Reader::new("\\n");
565
        assert_eq!(escape_char(&mut reader).unwrap(), '\n');
566
        assert_eq!(reader.cursor().index, 2);
567

            
568
        let mut reader = Reader::new("\\u{0a}");
569
        assert_eq!(escape_char(&mut reader).unwrap(), '\n');
570
        assert_eq!(reader.cursor().index, 6);
571

            
572
        let mut reader = Reader::new("x");
573
        let error = escape_char(&mut reader).err().unwrap();
574
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
575
        assert_eq!(
576
            error.kind,
577
            ParseErrorKind::Expecting {
578
                value: "\\".to_string()
579
            }
580
        );
581
        assert!(error.recoverable);
582
        assert_eq!(reader.cursor().index, 0);
583
    }
584

            
585
    #[test]
586
    fn test_unicode() {
587
        let mut reader = Reader::new("{000a}");
588
        assert_eq!(unicode(&mut reader).unwrap(), '\n');
589
        assert_eq!(reader.cursor().index, 6);
590

            
591
        let mut reader = Reader::new("{E9}");
592
        assert_eq!(unicode(&mut reader).unwrap(), 'é');
593
        assert_eq!(reader.cursor().index, 4);
594
    }
595

            
596
    #[test]
597
    fn test_hex_value() {
598
        let mut reader = Reader::new("20x");
599
        assert_eq!(hex_value(&mut reader).unwrap(), 32);
600

            
601
        let mut reader = Reader::new("x");
602
        let error = hex_value(&mut reader).err().unwrap();
603
        assert_eq!(error.pos, Pos { line: 1, column: 1 });
604
        assert_eq!(error.kind, ParseErrorKind::HexDigit);
605
        assert!(!error.recoverable);
606
    }
607

            
608
    #[test]
609
    fn test_quoted_template_benchmark() {
610
        // benchmark tests not in stable toolchain yet
611
        // Simply log duration for the time-being
612
        let mut reader = Reader::new(
613
            format!(
614
                "\"Hello World!\"{}",
615
                (0..10_000_000).map(|_| "X").collect::<String>()
616
            )
617
            .as_str(),
618
        );
619

            
620
        let now = SystemTime::now();
621
        assert!(quoted_template(&mut reader).is_ok());
622
        assert_eq!(reader.cursor().index, 14);
623
        eprintln!("duration= {}", now.elapsed().unwrap().as_nanos());
624
    }
625
}