From 7a7ec178b9fdbe8641278608cb6fb16fe9913a2e Mon Sep 17 00:00:00 2001 From: bluss Date: Tue, 4 Oct 2016 14:40:33 +0200 Subject: [PATCH] Fix ArrayString to implement .push(char) faster Previously we used formatting, which is a virtual call and quite the detour. Now copy the utf-8 encoding code from Rust (thank you Alex Crichton) and use that. --- src/array_string.rs | 21 ++++++++++++++++-- src/char_ext.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + tests/tests.rs | 15 +++++++++++++ 4 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 src/char_ext.rs diff --git a/src/array_string.rs b/src/array_string.rs index ee1b0aa..d5bd904 100644 --- a/src/array_string.rs +++ b/src/array_string.rs @@ -10,6 +10,7 @@ use std::slice; use array::Array; use array::Index; use CapacityError; +use char_ext::encode_utf8; /// A string with a fixed capacity. /// @@ -108,8 +109,16 @@ impl> ArrayString { /// assert_eq!(overflow.unwrap_err().element(), 'c'); /// ``` pub fn push(&mut self, c: char) -> Result<(), CapacityError> { - use std::fmt::Write; - self.write_char(c).map_err(|_| CapacityError::new(c)) + let len = self.len(); + unsafe { + match encode_utf8(c, &mut self.raw_mut_bytes()[len..]) { + Ok(n) => { + self.set_len(len + n); + Ok(()) + } + Err(_) => Err(CapacityError::new(c)), + } + } } /// Adds the given string slice to the end of the string. @@ -169,6 +178,11 @@ impl> ArrayString { pub fn as_str(&self) -> &str { self } + + /// Return a mutable slice of the whole string's buffer + unsafe fn raw_mut_bytes(&mut self) -> &mut [u8] { + slice::from_raw_parts_mut(self.xs.as_mut_ptr(), self.capacity()) + } } impl> Deref for ArrayString { @@ -237,6 +251,9 @@ impl> fmt::Display for ArrayString { /// `Write` appends written data to the end of the string. impl> fmt::Write for ArrayString { + fn write_char(&mut self, c: char) -> fmt::Result { + self.push(c).map_err(|_| fmt::Error) + } fn write_str(&mut self, s: &str) -> fmt::Result { self.push_str(s).map_err(|_| fmt::Error) } diff --git a/src/char_ext.rs b/src/char_ext.rs new file mode 100644 index 0000000..74d816f --- /dev/null +++ b/src/char_ext.rs @@ -0,0 +1,52 @@ +// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +// +// Original authors: alexchrichton + + +// UTF-8 ranges and tags for encoding characters +const TAG_CONT: u8 = 0b1000_0000; +const TAG_TWO_B: u8 = 0b1100_0000; +const TAG_THREE_B: u8 = 0b1110_0000; +const TAG_FOUR_B: u8 = 0b1111_0000; +const MAX_ONE_B: u32 = 0x80; +const MAX_TWO_B: u32 = 0x800; +const MAX_THREE_B: u32 = 0x10000; + +/// Placeholder +pub struct EncodeError; + +/// Encode a char into buf +#[inline] +pub fn encode_utf8(ch: char, buf: &mut [u8]) -> Result +{ + let code = ch as u32; + if code < MAX_ONE_B && buf.len() >= 1 { + buf[0] = code as u8; + return Ok(1); + } else if code < MAX_TWO_B && buf.len() >= 2 { + buf[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + buf[1] = (code & 0x3F) as u8 | TAG_CONT; + return Ok(2); + } else if code < MAX_THREE_B && buf.len() >= 3 { + buf[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + buf[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code & 0x3F) as u8 | TAG_CONT; + return Ok(3); + } else if buf.len() >= 4 { + buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + return Ok(4); + }; + Err(EncodeError) +} + diff --git a/src/lib.rs b/src/lib.rs index 390c691..d06798b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,6 +55,7 @@ use nodrop::NoDrop; mod array; mod array_string; +mod char_ext; pub use array::Array; pub use odds::IndexRange as RangeArgument; diff --git a/tests/tests.rs b/tests/tests.rs index 3dc204c..42ecc99 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -356,6 +356,21 @@ fn test_string_clone() { assert_eq!(&t, &s); } +#[test] +fn test_string_push() { + let text = "abcαβγ"; + let mut s = ArrayString::<[_; 8]>::new(); + for c in text.chars() { + if let Err(_) = s.push(c) { + break; + } + } + assert_eq!("abcαβ", &s[..]); + s.push('x').ok(); + assert_eq!("abcαβx", &s[..]); + assert!(s.push('x').is_err()); +} + #[test] fn test_insert_at_length() {