From 456aeaf55782d6f304df79c83cc6a9d6f5b4cc28 Mon Sep 17 00:00:00 2001 From: bluss Date: Thu, 26 Oct 2017 22:44:07 +0200 Subject: [PATCH 1/2] FEAT: Remove odds dependency in arrayvec Copy the encode_utf8 function from odds. std encode_utf8 requires Rust 1.15 and has a different signature, this one seems to fit us better. --- Cargo.toml | 4 ---- src/array_string.rs | 2 +- src/char.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 +- 4 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 src/char.rs diff --git a/Cargo.toml b/Cargo.toml index 1873070..3362143 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,10 +11,6 @@ repository = "https://github.com/bluss/arrayvec" keywords = ["stack", "vector", "array", "data-structure", "no_std"] categories = ["data-structures", "no-std"] -[dependencies.odds] -version = "0.2.23" -default-features = false - [dependencies.nodrop] version = "0.1.8" path = "nodrop" diff --git a/src/array_string.rs b/src/array_string.rs index de63ee2..54f2d9e 100644 --- a/src/array_string.rs +++ b/src/array_string.rs @@ -12,7 +12,7 @@ use std::slice; use array::{Array, ArrayExt}; use array::Index; use CapacityError; -use odds::char::encode_utf8; +use char::encode_utf8; #[cfg(feature="serde-1")] use serde::{Serialize, Deserialize, Serializer, Deserializer}; diff --git a/src/char.rs b/src/char.rs new file mode 100644 index 0000000..8191dfb --- /dev/null +++ b/src/char.rs @@ -0,0 +1,54 @@ +// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +// +// Original authors: alexchrichton, bluss + +// UTF-8 ranges and tags for encoding characters +const TAG_CONT: u8 = 0b1000_0000; +const TAG_TWO_B: u8 = 0b1100_0000; +const TAG_THREE_B: u8 = 0b1110_0000; +const TAG_FOUR_B: u8 = 0b1111_0000; +const MAX_ONE_B: u32 = 0x80; +const MAX_TWO_B: u32 = 0x800; +const MAX_THREE_B: u32 = 0x10000; + +/// Placeholder +pub struct EncodeUtf8Error; + +/// Encode a char into buf using UTF-8. +/// +/// On success, return the byte length of the encoding (1, 2, 3 or 4).
+/// On error, return `EncodeUtf8Error` if the buffer was too short for the char. +#[inline] +pub fn encode_utf8(ch: char, buf: &mut [u8]) -> Result +{ + let code = ch as u32; + if code < MAX_ONE_B && buf.len() >= 1 { + buf[0] = code as u8; + return Ok(1); + } else if code < MAX_TWO_B && buf.len() >= 2 { + buf[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + buf[1] = (code & 0x3F) as u8 | TAG_CONT; + return Ok(2); + } else if code < MAX_THREE_B && buf.len() >= 3 { + buf[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + buf[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code & 0x3F) as u8 | TAG_CONT; + return Ok(3); + } else if buf.len() >= 4 { + buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + return Ok(4); + }; + Err(EncodeUtf8Error) +} + diff --git a/src/lib.rs b/src/lib.rs index bc9d97d..286dfd9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,7 +25,6 @@ //! #![doc(html_root_url="https://docs.rs/arrayvec/0.4/")] #![cfg_attr(not(feature="std"), no_std)] -extern crate odds; extern crate nodrop; #[cfg(feature="serde-1")] extern crate serde; @@ -62,6 +61,7 @@ use serde::{Serialize, Deserialize, Serializer, Deserializer}; mod array; mod array_string; +mod char; mod range; mod errors; From 4195f1a7415982d0fd8e010172e25a1eff9c2578 Mon Sep 17 00:00:00 2001 From: bluss Date: Fri, 27 Oct 2017 22:14:12 +0200 Subject: [PATCH 2/2] FEAT: Benchmarks for arraystring.try_push / push These benches inform encode_utf8 changes, if any. --- Cargo.toml | 4 ++ benches/arraystring.rs | 90 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 benches/arraystring.rs diff --git a/Cargo.toml b/Cargo.toml index 3362143..683f771 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,10 @@ bencher = "0.1.4" name = "extend" harness = false +[[bench]] +name = "arraystring" +harness = false + [features] default = ["std"] std = [] diff --git a/benches/arraystring.rs b/benches/arraystring.rs new file mode 100644 index 0000000..9cff587 --- /dev/null +++ b/benches/arraystring.rs @@ -0,0 +1,90 @@ + +extern crate arrayvec; +#[macro_use] extern crate bencher; + +use arrayvec::ArrayString; + +use bencher::Bencher; + +fn try_push_c(b: &mut Bencher) { + let mut v = ArrayString::<[u8; 512]>::new(); + b.iter(|| { + v.clear(); + while v.try_push('c').is_ok() { + } + v.len() + }); + b.bytes = v.capacity() as u64; +} + +fn try_push_alpha(b: &mut Bencher) { + let mut v = ArrayString::<[u8; 512]>::new(); + b.iter(|| { + v.clear(); + while v.try_push('α').is_ok() { + } + v.len() + }); + b.bytes = v.capacity() as u64; +} + +// Yes, pushing a string char-by-char is slow. Use .push_str. +fn try_push_string(b: &mut Bencher) { + let mut v = ArrayString::<[u8; 512]>::new(); + let input = "abcαβγ“”"; + b.iter(|| { + v.clear(); + for ch in input.chars().cycle() { + if !v.try_push(ch).is_ok() { + break; + } + } + v.len() + }); + b.bytes = v.capacity() as u64; +} + +fn push_c(b: &mut Bencher) { + let mut v = ArrayString::<[u8; 512]>::new(); + b.iter(|| { + v.clear(); + while !v.is_full() { + v.push('c'); + } + v.len() + }); + b.bytes = v.capacity() as u64; +} + +fn push_alpha(b: &mut Bencher) { + let mut v = ArrayString::<[u8; 512]>::new(); + b.iter(|| { + v.clear(); + while !v.is_full() { + v.push('α'); + } + v.len() + }); + b.bytes = v.capacity() as u64; +} + +fn push_string(b: &mut Bencher) { + let mut v = ArrayString::<[u8; 512]>::new(); + let input = "abcαβγ“”"; + b.iter(|| { + v.clear(); + for ch in input.chars().cycle() { + if !v.is_full() { + v.push(ch); + } else { + break; + } + } + v.len() + }); + b.bytes = v.capacity() as u64; +} + +benchmark_group!(benches, try_push_c, try_push_alpha, try_push_string, push_c, + push_alpha, push_string); +benchmark_main!(benches);