1
/*
2
 * Hurl (https://hurl.dev)
3
 * Copyright (C) 2024 Orange
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 *          http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
use std::fs::File;
19
use std::io::{ErrorKind, Read};
20
use std::path::{Path, PathBuf};
21
use std::{fmt, fs, io};
22

            
23
/// Represents the input of read operation: can be either a file or standard input.
24
#[derive(Clone, Debug, PartialEq, Eq)]
25
pub struct Input {
26
    /// Kind of input: either sourced from a file source, or from standard input.
27
    kind: InputKind,
28
}
29

            
30
impl fmt::Display for Input {
31
17375
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
32
17375
        self.kind.fmt(f)
33
    }
34
}
35

            
36
impl Input {
37
    /// Creates an input from a path source.
38
    pub fn new(path: &str) -> Self {
39
        let kind = InputKind::File(PathBuf::from(path));
40
        Input { kind }
41
    }
42

            
43
    /// Creates an input from standard input.
44
    /// The content of the standard input is read once and then cached. It can be re-read multiple
45
    /// times.
46
10
    pub fn from_stdin() -> Result<Self, io::Error> {
47
10
        let mut contents = String::new();
48
10
        io::stdin().read_to_string(&mut contents)?;
49
10
        let kind = InputKind::Stdin(contents);
50
10
        Ok(Input { kind })
51
    }
52

            
53
2625
    pub fn kind(&self) -> &InputKind {
54
2625
        &self.kind
55
    }
56

            
57
    /// Reads the content of this input to a string, removing any BOM.
58
3505
    pub fn read_to_string(&self) -> Result<String, io::Error> {
59
3505
        self.kind.read_to_string()
60
    }
61
}
62

            
63
/// Represents the kind of input of read operation.
64
#[derive(Clone, Debug, PartialEq, Eq)]
65
pub enum InputKind {
66
    /// Read from file.
67
    File(PathBuf),
68
    /// Read from standard input. Input is read once and the stdin string is cached and can be read
69
    /// multiple times.
70
    Stdin(String),
71
}
72

            
73
impl fmt::Display for InputKind {
74
17375
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75
17375
        let output = match self {
76
17325
            InputKind::File(file) => file.to_string_lossy().to_string(),
77
50
            InputKind::Stdin(_) => "-".to_string(),
78
        };
79
17375
        write!(f, "{output}")
80
    }
81
}
82

            
83
impl From<&Path> for Input {
84
2810
    fn from(value: &Path) -> Self {
85
2810
        let kind = InputKind::File(value.to_path_buf());
86
2810
        Input { kind }
87
    }
88
}
89

            
90
impl From<PathBuf> for Input {
91
65
    fn from(value: PathBuf) -> Self {
92
65
        let kind = InputKind::File(value);
93
65
        Input { kind }
94
    }
95
}
96

            
97
impl InputKind {
98
    /// Reads the content of this input to a string, removing any BOM.
99
3505
    fn read_to_string(&self) -> Result<String, io::Error> {
100
3505
        match self {
101
3450
            InputKind::File(path) => {
102
3450
                let mut f = File::open(path)?;
103
3450
                let metadata = fs::metadata(path).unwrap();
104
3450
                let mut buffer = vec![0; metadata.len() as usize];
105
3450
                f.read_exact(&mut buffer)?;
106
3450
                string_from_utf8(buffer)
107
            }
108
55
            InputKind::Stdin(cached) => Ok(cached.clone()),
109
        }
110
    }
111
}
112

            
113
3450
fn string_from_utf8(buffer: Vec<u8>) -> Result<String, io::Error> {
114
3450
    let mut buffer = buffer;
115
3450
    strip_bom(&mut buffer);
116
3453
    String::from_utf8(buffer).map_err(|e| io::Error::new(ErrorKind::InvalidData, e))
117
}
118

            
119
/// Remove BOM from the input bytes
120
3450
fn strip_bom(bytes: &mut Vec<u8>) {
121
3450
    if bytes.starts_with(&[0xefu8, 0xbb, 0xbf]) {
122
20
        bytes.drain(0..3);
123
    }
124
}
125

            
126
#[cfg(test)]
127
pub mod tests {
128
    use super::*;
129

            
130
    #[test]
131
    fn test_strip_bom() {
132
        let mut bytes = vec![];
133
        strip_bom(&mut bytes);
134
        assert!(bytes.is_empty());
135

            
136
        let mut bytes = vec![0xef, 0xbb, 0xbf, 0x68, 0x65, 0x6c, 0x6c, 0x6f];
137
        strip_bom(&mut bytes);
138
        assert_eq!(bytes, vec![0x68, 0x65, 0x6c, 0x6c, 0x6f]);
139

            
140
        let mut bytes = vec![0x68, 0x65, 0x6c, 0x6c, 0x6f];
141
        strip_bom(&mut bytes);
142
        assert_eq!(bytes, vec![0x68, 0x65, 0x6c, 0x6c, 0x6f]);
143
    }
144

            
145
    #[test]
146
    fn test_string_from_utf8_bom() {
147
        let mut bytes = vec![];
148
        strip_bom(&mut bytes);
149
        assert_eq!(string_from_utf8(vec![]).unwrap(), "");
150
        assert_eq!(
151
            string_from_utf8(vec![0xef, 0xbb, 0xbf, 0x68, 0x65, 0x6c, 0x6c, 0x6f]).unwrap(),
152
            "hello"
153
        );
154
        assert_eq!(
155
            string_from_utf8(vec![0x68, 0x65, 0x6c, 0x6c, 0x6f]).unwrap(),
156
            "hello"
157
        );
158
        let err = string_from_utf8(vec![0xef]).err().unwrap();
159
        assert_eq!(
160
            err.to_string(),
161
            "incomplete utf-8 byte sequence from index 0"
162
        );
163
    }
164
}