Skip to content

Commit 83d9114

Browse files
committed
allow parsing of PEM files that contain headers
As reported in #36, PEM files can have an (optional) header section after the BEGIN line and before the base64 encoded data (https://www.rfc-editor.org/rfc/rfc1421 is a good description of it). At the very least, we need to support skipping past the headers and retrieving the base64 encoded data that occurs after them. To do that, I refactored the parsing code a bit to make it a little more clear what is going on and make it easy to strip out the headers. Additionally, I added a little helper to decode the base64 encoded data so that I can use that in the tests with the expected base64 data so I can ensure that I am not leaving anything behind when skipping the headers.
1 parent 8a74310 commit 83d9114

File tree

2 files changed

+164
-15
lines changed

2 files changed

+164
-15
lines changed

src/lib.rs

Lines changed: 133 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
//! extern crate pem;
2929
//! ```
3030
//!
31-
//! Using the `serde` feature will implement the serde traits for
31+
//! Using the `serde` feature will implement the serde traits for
3232
//! the `Pem` struct.
3333
//!
3434
//! # Example: parse a single chunk of PEM-encoded text
@@ -141,6 +141,17 @@ pub struct Pem {
141141
pub contents: Vec<u8>,
142142
}
143143

144+
fn decode_data(raw_data: &str) -> Result<Vec<u8>> {
145+
// We need to get rid of newlines for base64::decode
146+
// As base64 requires an AsRef<[u8]>, this must involve a copy
147+
let data: String = raw_data.lines().map(str::trim_end).collect();
148+
149+
// And decode it from Base64 into a vector of u8
150+
let contents = base64::decode_config(&data, base64::STANDARD).map_err(PemError::InvalidData)?;
151+
152+
Ok(contents)
153+
}
154+
144155
impl Pem {
145156
fn new_from_captures(caps: Captures) -> Result<Pem> {
146157
fn as_utf8<'a>(bytes: &'a [u8]) -> Result<&'a str> {
@@ -166,14 +177,7 @@ impl Pem {
166177

167178
// If they did, then we can grab the data section
168179
let raw_data = as_utf8(caps.data)?;
169-
170-
// We need to get rid of newlines for base64::decode
171-
// As base64 requires an AsRef<[u8]>, this must involve a copy
172-
let data: String = raw_data.lines().map(str::trim_end).collect();
173-
174-
// And decode it from Base64 into a vector of u8
175-
let contents =
176-
base64::decode_config(&data, base64::STANDARD).map_err(PemError::InvalidData)?;
180+
let contents = decode_data(raw_data)?;
177181

178182
Ok(Pem {
179183
tag: tag.to_owned(),
@@ -673,4 +677,124 @@ RzHX0lkJl9Stshd/7Gbt65/QYq+v+xvAeT0CoyIg
673677
let result = serde_json::from_str(&value).unwrap();
674678
assert_eq!(pem, result);
675679
}
680+
681+
const HEADER_CRLF: &'static str = "-----BEGIN CERTIFICATE-----\r
682+
MIIBPQIBAAJBAOsfi5AGYhdRs/x6q5H7kScxA0Kzzqe6WI6gf6+tc6IvKQJo5rQc\r
683+
dWWSQ0nRGt2hOPDO+35NKhQEjBQxPh/v7n0CAwEAAQJBAOGaBAyuw0ICyENy5NsO\r
684+
2gkT00AWTSzM9Zns0HedY31yEabkuFvrMCHjscEF7u3Y6PB7An3IzooBHchsFDei\r
685+
AAECIQD/JahddzR5K3A6rzTidmAf1PBtqi7296EnWv8WvpfAAQIhAOvowIXZI4Un\r
686+
DXjgZ9ekuUjZN+GUQRAVlkEEohGLVy59AiEA90VtqDdQuWWpvJX0cM08V10tLXrT\r
687+
TTGsEtITid1ogAECIQDAaFl90ZgS5cMrL3wCeatVKzVUmuJmB/VAmlLFFGzK0QIh\r
688+
ANJGc7AFk4fyFD/OezhwGHbWmo/S+bfeAiIh2Ss2FxKJ\r
689+
-----END CERTIFICATE-----\r
690+
-----BEGIN RSA PRIVATE KEY-----\r
691+
Proc-Type: 4,ENCRYPTED\r
692+
DEK-Info: AES-256-CBC,975C518B7D2CCD1164A3354D1F89C5A6\r
693+
\r
694+
MIIBOgIBAAJBAMIeCnn9G/7g2Z6J+qHOE2XCLLuPoh5NHTO2Fm+PbzBvafBo0oYo\r
695+
QVVy7frzxmOqx6iIZBxTyfAQqBPO3Br59BMCAwEAAQJAX+PjHPuxdqiwF6blTkS0\r
696+
RFI1MrnzRbCmOkM6tgVO0cd6r5Z4bDGLusH9yjI9iI84gPRjK0AzymXFmBGuREHI\r
697+
sQIhAPKf4pp+Prvutgq2ayygleZChBr1DC4XnnufBNtaswyvAiEAzNGVKgNvzuhk\r
698+
ijoUXIDruJQEGFGvZTsi1D2RehXiT90CIQC4HOQUYKCydB7oWi1SHDokFW2yFyo6\r
699+
/+lf3fgNjPI6OQIgUPmTFXciXxT1msh3gFLf3qt2Kv8wbr9Ad9SXjULVpGkCIB+g\r
700+
RzHX0lkJl9Stshd/7Gbt65/QYq+v+xvAeT0CoyIg\r
701+
-----END RSA PRIVATE KEY-----\r
702+
";
703+
const HEADER_CRLF_DATA: [&'static str; 2] = [
704+
"MIIBPQIBAAJBAOsfi5AGYhdRs/x6q5H7kScxA0Kzzqe6WI6gf6+tc6IvKQJo5rQc\r
705+
dWWSQ0nRGt2hOPDO+35NKhQEjBQxPh/v7n0CAwEAAQJBAOGaBAyuw0ICyENy5NsO\r
706+
2gkT00AWTSzM9Zns0HedY31yEabkuFvrMCHjscEF7u3Y6PB7An3IzooBHchsFDei\r
707+
AAECIQD/JahddzR5K3A6rzTidmAf1PBtqi7296EnWv8WvpfAAQIhAOvowIXZI4Un\r
708+
DXjgZ9ekuUjZN+GUQRAVlkEEohGLVy59AiEA90VtqDdQuWWpvJX0cM08V10tLXrT\r
709+
TTGsEtITid1ogAECIQDAaFl90ZgS5cMrL3wCeatVKzVUmuJmB/VAmlLFFGzK0QIh\r
710+
ANJGc7AFk4fyFD/OezhwGHbWmo/S+bfeAiIh2Ss2FxKJ\r",
711+
"MIIBOgIBAAJBAMIeCnn9G/7g2Z6J+qHOE2XCLLuPoh5NHTO2Fm+PbzBvafBo0oYo\r
712+
QVVy7frzxmOqx6iIZBxTyfAQqBPO3Br59BMCAwEAAQJAX+PjHPuxdqiwF6blTkS0\r
713+
RFI1MrnzRbCmOkM6tgVO0cd6r5Z4bDGLusH9yjI9iI84gPRjK0AzymXFmBGuREHI\r
714+
sQIhAPKf4pp+Prvutgq2ayygleZChBr1DC4XnnufBNtaswyvAiEAzNGVKgNvzuhk\r
715+
ijoUXIDruJQEGFGvZTsi1D2RehXiT90CIQC4HOQUYKCydB7oWi1SHDokFW2yFyo6\r
716+
/+lf3fgNjPI6OQIgUPmTFXciXxT1msh3gFLf3qt2Kv8wbr9Ad9SXjULVpGkCIB+g\r
717+
RzHX0lkJl9Stshd/7Gbt65/QYq+v+xvAeT0CoyIg\r",
718+
];
719+
720+
const HEADER_LF: &'static str = "-----BEGIN CERTIFICATE-----
721+
MIIBPQIBAAJBAOsfi5AGYhdRs/x6q5H7kScxA0Kzzqe6WI6gf6+tc6IvKQJo5rQc
722+
dWWSQ0nRGt2hOPDO+35NKhQEjBQxPh/v7n0CAwEAAQJBAOGaBAyuw0ICyENy5NsO
723+
2gkT00AWTSzM9Zns0HedY31yEabkuFvrMCHjscEF7u3Y6PB7An3IzooBHchsFDei
724+
AAECIQD/JahddzR5K3A6rzTidmAf1PBtqi7296EnWv8WvpfAAQIhAOvowIXZI4Un
725+
DXjgZ9ekuUjZN+GUQRAVlkEEohGLVy59AiEA90VtqDdQuWWpvJX0cM08V10tLXrT
726+
TTGsEtITid1ogAECIQDAaFl90ZgS5cMrL3wCeatVKzVUmuJmB/VAmlLFFGzK0QIh
727+
ANJGc7AFk4fyFD/OezhwGHbWmo/S+bfeAiIh2Ss2FxKJ
728+
-----END CERTIFICATE-----
729+
-----BEGIN RSA PRIVATE KEY-----
730+
Proc-Type: 4,ENCRYPTED
731+
DEK-Info: AES-256-CBC,975C518B7D2CCD1164A3354D1F89C5A6
732+
733+
MIIBOgIBAAJBAMIeCnn9G/7g2Z6J+qHOE2XCLLuPoh5NHTO2Fm+PbzBvafBo0oYo
734+
QVVy7frzxmOqx6iIZBxTyfAQqBPO3Br59BMCAwEAAQJAX+PjHPuxdqiwF6blTkS0
735+
RFI1MrnzRbCmOkM6tgVO0cd6r5Z4bDGLusH9yjI9iI84gPRjK0AzymXFmBGuREHI
736+
sQIhAPKf4pp+Prvutgq2ayygleZChBr1DC4XnnufBNtaswyvAiEAzNGVKgNvzuhk
737+
ijoUXIDruJQEGFGvZTsi1D2RehXiT90CIQC4HOQUYKCydB7oWi1SHDokFW2yFyo6
738+
/+lf3fgNjPI6OQIgUPmTFXciXxT1msh3gFLf3qt2Kv8wbr9Ad9SXjULVpGkCIB+g
739+
RzHX0lkJl9Stshd/7Gbt65/QYq+v+xvAeT0CoyIg
740+
-----END RSA PRIVATE KEY-----
741+
";
742+
const HEADER_LF_DATA: [&'static str; 2] = [
743+
"MIIBPQIBAAJBAOsfi5AGYhdRs/x6q5H7kScxA0Kzzqe6WI6gf6+tc6IvKQJo5rQc
744+
dWWSQ0nRGt2hOPDO+35NKhQEjBQxPh/v7n0CAwEAAQJBAOGaBAyuw0ICyENy5NsO
745+
2gkT00AWTSzM9Zns0HedY31yEabkuFvrMCHjscEF7u3Y6PB7An3IzooBHchsFDei
746+
AAECIQD/JahddzR5K3A6rzTidmAf1PBtqi7296EnWv8WvpfAAQIhAOvowIXZI4Un
747+
DXjgZ9ekuUjZN+GUQRAVlkEEohGLVy59AiEA90VtqDdQuWWpvJX0cM08V10tLXrT
748+
TTGsEtITid1ogAECIQDAaFl90ZgS5cMrL3wCeatVKzVUmuJmB/VAmlLFFGzK0QIh
749+
ANJGc7AFk4fyFD/OezhwGHbWmo/S+bfeAiIh2Ss2FxKJ",
750+
"MIIBOgIBAAJBAMIeCnn9G/7g2Z6J+qHOE2XCLLuPoh5NHTO2Fm+PbzBvafBo0oYo
751+
QVVy7frzxmOqx6iIZBxTyfAQqBPO3Br59BMCAwEAAQJAX+PjHPuxdqiwF6blTkS0
752+
RFI1MrnzRbCmOkM6tgVO0cd6r5Z4bDGLusH9yjI9iI84gPRjK0AzymXFmBGuREHI
753+
sQIhAPKf4pp+Prvutgq2ayygleZChBr1DC4XnnufBNtaswyvAiEAzNGVKgNvzuhk
754+
ijoUXIDruJQEGFGvZTsi1D2RehXiT90CIQC4HOQUYKCydB7oWi1SHDokFW2yFyo6
755+
/+lf3fgNjPI6OQIgUPmTFXciXxT1msh3gFLf3qt2Kv8wbr9Ad9SXjULVpGkCIB+g
756+
RzHX0lkJl9Stshd/7Gbt65/QYq+v+xvAeT0CoyIg",
757+
];
758+
759+
fn cmp_data(left: &[u8], right: &[u8]) -> bool {
760+
if left.len() != right.len() {
761+
false
762+
} else {
763+
left.iter()
764+
.zip(right.iter())
765+
.all(|(left, right)| left == right)
766+
}
767+
}
768+
769+
#[test]
770+
fn test_parse_many_with_headers_crlf() {
771+
let pems = parse_many(HEADER_CRLF).unwrap();
772+
assert_eq!(pems.len(), 2);
773+
assert_eq!(pems[0].tag, "CERTIFICATE");
774+
assert!(cmp_data(
775+
&pems[0].contents,
776+
&decode_data(HEADER_CRLF_DATA[0]).unwrap()
777+
));
778+
assert_eq!(pems[1].tag, "RSA PRIVATE KEY");
779+
assert!(cmp_data(
780+
&pems[1].contents,
781+
&decode_data(HEADER_CRLF_DATA[1]).unwrap()
782+
));
783+
}
784+
785+
#[test]
786+
fn test_parse_many_with_headers_lf() {
787+
let pems = parse_many(HEADER_LF).unwrap();
788+
assert_eq!(pems.len(), 2);
789+
assert_eq!(pems[0].tag, "CERTIFICATE");
790+
assert!(cmp_data(
791+
&pems[0].contents,
792+
&decode_data(HEADER_LF_DATA[0]).unwrap()
793+
));
794+
assert_eq!(pems[1].tag, "RSA PRIVATE KEY");
795+
assert!(cmp_data(
796+
&pems[1].contents,
797+
&decode_data(HEADER_LF_DATA[1]).unwrap()
798+
));
799+
}
676800
}

src/parser.rs

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,33 @@ impl<'a> Iterator for CaptureMatches<'a> {
3333
}
3434
}
3535

36+
fn parse_begin<'a>(input: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
37+
let (input, _) = read_until(input, b"-----BEGIN ")?;
38+
let (input, begin) = read_until(input, b"-----")?;
39+
let input = skip_whitespace(input);
40+
Some((input, begin))
41+
}
42+
43+
fn parse_payload<'a>(input: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
44+
read_until(input, b"-----END ")
45+
}
46+
47+
fn extract_headers_and_data<'a>(input: &'a [u8]) -> (&'a [u8], &'a [u8]) {
48+
if let Some((rest, headers)) = read_until(input, b"\n\n") {
49+
(headers, rest)
50+
} else if let Some((rest, headers)) = read_until(input, b"\r\n\r\n") {
51+
(headers, rest)
52+
} else {
53+
(&[], input)
54+
}
55+
}
56+
57+
fn parse_end<'a>(input: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
58+
let (remaining, end) = read_until(input, b"-----")?;
59+
let remaining = skip_whitespace(remaining);
60+
Some((remaining, end))
61+
}
62+
3663
fn parser_inner<'a>(input: &'a [u8]) -> Option<(&'a [u8], Captures<'a>)> {
3764
// Should be equivalent to the regex
3865
// "(?s)-----BEGIN (?P<begin>.*?)-----[ \t\n\r]*(?P<data>.*?)-----END (?P<end>.*?)-----[ \t\n\r]*"
@@ -42,12 +69,10 @@ fn parser_inner<'a>(input: &'a [u8]) -> Option<(&'a [u8], Captures<'a>)> {
4269
// (?P<data>.*?) # Parse data
4370
// -----END (?P<end>.*?)-----[ \t\n\r]* # Parse end
4471

45-
let (input, _) = read_until(input, b"-----BEGIN ")?;
46-
let (input, begin) = read_until(input, b"-----")?;
47-
let input = skip_whitespace(input);
48-
let (input, data) = read_until(input, b"-----END ")?;
49-
let (remaining, end) = read_until(input, b"-----")?;
50-
let remaining = skip_whitespace(remaining);
72+
let (input, begin) = parse_begin(input)?;
73+
let (input, payload) = parse_payload(input)?;
74+
let (_headers, data) = extract_headers_and_data(payload);
75+
let (remaining, end) = parse_end(input)?;
5176

5277
let captures = Captures { begin, data, end };
5378
Some((remaining, captures))

0 commit comments

Comments
 (0)