diff --git a/subject_source/src/lib.rs b/subject_source/src/lib.rs index 9eb255a..6aba7bd 100644 --- a/subject_source/src/lib.rs +++ b/subject_source/src/lib.rs @@ -1,2 +1,3 @@ pub mod errors; pub mod vec; +pub mod string; diff --git a/subject_source/src/string.rs b/subject_source/src/string.rs new file mode 100644 index 0000000..12c51f5 --- /dev/null +++ b/subject_source/src/string.rs @@ -0,0 +1,2 @@ +pub mod utf8; +pub mod parse; diff --git a/subject_source/src/string/parse.rs b/subject_source/src/string/parse.rs new file mode 100644 index 0000000..9a8131a --- /dev/null +++ b/subject_source/src/string/parse.rs @@ -0,0 +1,26 @@ +use crate::vec::compute::*; +use std::str::FromStr; + +#[derive(PartialEq, Debug)] +pub enum ParseOperationError { + Number(std::num::ParseFloatError), + UnknownOperation, +} + +impl FromStr for Operation { + type Err = ParseOperationError; + + fn from_str(s: &str) -> Result { + match s { + "+" => Ok(Operation::Binary(Binary::Add)), + "-" => Ok(Operation::Binary(Binary::Sub)), + "*" => Ok(Operation::Binary(Binary::Mul)), + "/" => Ok(Operation::Binary(Binary::Div)), + s if s.starts_with(|c| "0123456789.-".contains(c)) => { + let num = s.parse().map_err(ParseOperationError::Number)?; + Ok(Operation::Push(num)) + } + _ => Err(ParseOperationError::UnknownOperation), + } + } +} diff --git a/subject_source/src/string/utf8.rs b/subject_source/src/string/utf8.rs new file mode 100644 index 0000000..769ba73 --- /dev/null +++ b/subject_source/src/string/utf8.rs @@ -0,0 +1,4 @@ +/// Returns the char at the asked position (if not out of bound) +pub fn char_at(s: &str, n: usize) -> Option { + s.chars().nth(n) +} diff --git a/subject_source/src/vec/compute.rs b/subject_source/src/vec/compute.rs index 01cd4e3..c117f97 100644 --- a/subject_source/src/vec/compute.rs +++ b/subject_source/src/vec/compute.rs @@ -1,8 +1,10 @@ +#[derive(PartialEq, Debug)] pub enum Operation { Push(f32), Binary(Binary), } +#[derive(PartialEq, Debug)] pub enum Binary { Add, Sub, diff --git a/subject_source/tests/string_parse.rs b/subject_source/tests/string_parse.rs new file mode 100644 index 0000000..2c9fe57 --- /dev/null +++ b/subject_source/tests/string_parse.rs @@ -0,0 +1,48 @@ +use subject_source::string::parse::*; +use subject_source::vec::compute::*; + +#[test] +pub fn parse_add() { + assert_eq!("+".parse(), Ok(Operation::Binary(Binary::Add))); +} + +#[test] +pub fn parse_sub() { + assert_eq!("-".parse(), Ok(Operation::Binary(Binary::Sub))); +} + +#[test] +pub fn parse_mul() { + assert_eq!("*".parse(), Ok(Operation::Binary(Binary::Mul))); +} + +#[test] +pub fn parse_div() { + assert_eq!("/".parse(), Ok(Operation::Binary(Binary::Div))); +} + +#[test] +pub fn parse_number() { + assert_eq!("3.14".parse(), Ok(Operation::Push(3.14))); +} + +#[test] +pub fn parse_number_negative() { + assert_eq!("-3.14".parse(), Ok(Operation::Push(-3.14))); +} + +#[test] +pub fn parse_bad_number() { + match "3.14aaa".parse::() { + Err(ParseOperationError::Number(_)) => {} + _ => panic!("'3.14aaa' should raise a ParseOperationError::Number error"), + } +} + +#[test] +pub fn parse_bad_operation() { + match "blabla".parse::() { + Err(ParseOperationError::UnknownOperation) => {} + _ => panic!("'blabla' should raise a ParseOperationError::UnknownOperation error"), + } +} diff --git a/subject_source/tests/string_utf8.rs b/subject_source/tests/string_utf8.rs new file mode 100644 index 0000000..4a3d545 --- /dev/null +++ b/subject_source/tests/string_utf8.rs @@ -0,0 +1,21 @@ +use subject_source::string::utf8::char_at; + +#[test] +pub fn char_at_empty() { + assert_eq!(char_at("", 0), None); +} + +#[test] +pub fn char_at_emoji() { + assert_eq!(char_at("🙂🙁", 1), Some('🙁')); +} + +#[test] +pub fn char_at_oob() { + assert_eq!(char_at("abc", 3), None); +} + +#[test] +pub fn char_at_ascii() { + assert_eq!(char_at("abc", 1), Some('b')); +} diff --git a/subject_text/index.md b/subject_text/index.md index 0fe0647..0c80c7f 100644 --- a/subject_text/index.md +++ b/subject_text/index.md @@ -1,6 +1,6 @@ --- revision = "0.3.0" -parts = ["errors", "vec"] +parts = ["errors", "vec", "string"] --- When it comes to programming, it's all fun and games until the real world comes in and sends weird unexpected inputs to your little protege. So you better handle those cases as best as you can. There are 3 main ways of handling errors. diff --git a/subject_text/string/index.md b/subject_text/string/index.md new file mode 100644 index 0000000..876dceb --- /dev/null +++ b/subject_text/string/index.md @@ -0,0 +1,5 @@ +--- +name = "Strings and parsing" +difficulty = 5 +exercises = ["utf8.md", "parse.md"] +--- diff --git a/subject_text/string/parse.md b/subject_text/string/parse.md new file mode 100644 index 0000000..db133d1 --- /dev/null +++ b/subject_text/string/parse.md @@ -0,0 +1,50 @@ +--- +name = "Parse" +file = "src/string/parse.rs" +--- + +Parsing is the act of extracting structured information from a string of symbols. In programming we generaly parse raw bytes, or even sometimes strings of bits. In Rust, we have very rudimentary parsing utility, allowing user to parse most simple standard types from `&str`. This is encoded in the [`FromStr`](https://doc.rust-lang.org/std/str/trait.FromStr.html) trait. + +Let's parse simple structured data. Remember the type we introduced in the Forth exercise ? + +```rust +pub enum Operation { + Push(f32), + Binary(Binary), +} + +pub enum Binary { + Add, + Sub, + Mul, + Div, +} +``` + +Let's implement `FromStr` for the `Binary` and `Operation` types. Any number should create a `Push`, and we will map `"+"` to `Add`, `"-"` to `Sub`, `"*"` to `Mul`, and `"/"` to `Div`. + +```prototype +use crate::vec::forth; +use std::str::FromStr; + +#[derive(PartialEq, Debug)] +pub enum ParseOperationError { + Number(std::num::ParseFloatError), + UnknownOperation, +} + +impl FromStr for Operation { + type Err = ParseOperationError; + + fn from_str(s: &str) -> Result { + unimplemented!() + } +} +``` + +```example +fn main() { + assert_eq!("123".parse(), Ok(Operation::Push(123))); + assert_eq!("+".parse(), Ok(Operation::Binary(Binary::Add))); + assert_eq!("foo".parse(), Err(ParseOperationError::UnknownOperation)); +} diff --git a/subject_text/string/utf8.md b/subject_text/string/utf8.md new file mode 100644 index 0000000..7c0f190 --- /dev/null +++ b/subject_text/string/utf8.md @@ -0,0 +1,37 @@ +--- +name = "UTF-8" +file = "src/string/utf8.rs" +--- + + +We will focus on the `String` type and its borrowed variant `&str`. These are UTF-8 strings, and to enforce this, all functions that create strings can only either give valid UTF-8 strings, or fail (with the error types we encountered before). + +> # deepening +> A word about UTF-8: +> It is a string format where characters (or "codepoints") are encoded using a variable number of bytes. +> ASCII characters are a subset of UTF-8, thuss are all encoded in 1 byte, but for other characters, they +> can take 2, 3, or 4 (maximum) bytes. Because of this, random access is difficult, because ou cannot compute +> the position in memory of the Nth codepoint without iterating through the whole string from the start. +> +> This is why Rust cannot make `char` accessible with direct indexing (`[]` operator), but allow iterating over `char`s. + +Let's implement a function accessing the Nth char of a string: + +> # note +> You can use [`str::chars`](https://doc.rust-lang.org/std/primitive.str.html#method.chars) to iterate over +> a string chars. If you want some challenge, you can also read the [UTF-8 spec](https://fr.wikipedia.org/wiki/UTF-8) and iterate over single bytes of the string. + +```prototype +/// Returns the char at the asked position (if not out of bound) +pub fn char_at(s: &str, n: usize) -> Option { + unimplemented!() +} +``` + +```example +fn main() { + assert_eq!(char_at("abcdef", 2), Some('c')); + assert_eq!(char_at("", 1), None); + assert_eq!(char_at("🧐", 0), Some('🧐')); +} +``` diff --git a/subject_text/tree b/subject_text/tree index 94956a0..fe62bf7 100644 --- a/subject_text/tree +++ b/subject_text/tree @@ -8,5 +8,8 @@    ├── vec    │   ├── access.rs    │   └── compute.rs +    ├── string +    │   ├── utf8.rs +    │   └── parse.rs    ├── vec.rs    └── lib.rs