diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..ddda1b3 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,51 @@ +name: CI + +on: + push: + branches: [auto] + pull_request: + workflow_dispatch: + +jobs: + linux-ci: + name: Linux + runs-on: ubuntu-latest + strategy: + matrix: + toolchain: ["stable", "beta", "nightly", "1.36.0"] + steps: + - uses: actions/checkout@v2 + + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.toolchain }} + override: true + + - name: Cargo build + run: cargo build + + - name: Cargo doc + run: cargo doc + + - name: Cargo test + run: cargo test --features 'encoding encoding_rs' + + - name: Cargo bench + if: matrix.toolchain == 'nightly' + run: cargo test --features bench + + build_result: + name: homu build finished + runs-on: ubuntu-latest + needs: + - "linux-ci" + + steps: + - name: Mark the job as successful + run: exit 0 + if: success() + - name: Mark the job as unsuccessful + run: exit 1 + if: "!success()" diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index de423f4..0000000 --- a/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -language: rust -rust: - - nightly - - beta - - stable -script: - - cargo build - - cargo doc - - "cargo test --features 'encoding encoding_rs'" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features bench; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then (cd capi/ctest; ./build-and-test.sh); fi" -notifications: - webhooks: http://build.servo.org:54856/travis diff --git a/Cargo.toml b/Cargo.toml index 02dcfa2..ed7740f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tendril" -version = "0.4.1" +version = "0.4.3" authors = ["Keegan McAllister ", "Simon Sapin ", "Chris Morgan "] @@ -13,7 +13,7 @@ description = "Compact buffer/string type for zero-copy parsing" mac = "0.1" encoding = {version = "0.2", optional = true} encoding_rs = {version = "0.8.12", optional = true} -futf = "0.1.1" +futf = "0.1.5" utf-8 = "0.7" [dev-dependencies] diff --git a/README.md b/README.md index 101497e..fced4b7 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ **Warning**: This library is at a very early stage of development, and it contains a substantial amount of `unsafe` code. Use at your own risk! -[![Build Status](https://travis-ci.org/servo/tendril.svg?branch=master)](https://travis-ci.org/servo/tendril) +[![Build Status](https://github.com/servo/tendril/workflows/CI/badge.svg)](https://github.com/servo/tendril/actions) -[API Documentation](http://doc.servo.org/tendril/index.html) +[API Documentation](https://doc.servo.org/tendril/index.html) ## Introduction @@ -31,7 +31,8 @@ to go over the limit. ## Formats and encoding -`Tendril` uses [phantom types](http://rustbyexample.com/generics/phantom.html) +`Tendril` uses +[phantom types](https://doc.rust-lang.org/stable/rust-by-example/generics/phantom.html) to track a buffer's format. This determines at compile time which operations are available on a given tendril. For example, `Tendril` and `Tendril` can be borrowed as `&str` and `&[u8]` respectively. @@ -40,26 +41,6 @@ operations are available on a given tendril. For example, `Tendril` and [rust-encoding](https://github.com/lifthrasiir/rust-encoding) and has preliminary support for [WTF-8][] buffers. -## C interface - -`Tendril` provides a C API, which allows Rust to efficiently exchange buffers -with C or any other language. - -```c -#include "tendril.h" - -int main() { - tendril t = TENDRIL_INIT; - tendril_sprintf(&t, "Hello, %d!\n", 2015); - tendril_fwrite(&t, stdout); - some_rust_library(t); // transfer ownership - return 0; -} -``` - -See the [API documentation](https://github.com/kmcallister/tendril/blob/master/capi/include/tendril.h#L18) -and the [test program](https://github.com/kmcallister/tendril/blob/master/capi/ctest/test.c). - ## Plans for the future ### Ropes @@ -107,9 +88,9 @@ metadata is chosen by the API consumer; it defaults to `()`, which has size zero. For any non-inline string, we can provide the associated metadata as well as a byte offset. -[NonZero]: http://doc.rust-lang.org/core/nonzero/struct.NonZero.html +[NonZero]: https://doc.rust-lang.org/core/nonzero/struct.NonZero.html [html5ever]: https://github.com/servo/html5ever -[WTF-8]: http://simonsapin.github.io/wtf-8/ -[rope]: http://en.wikipedia.org/wiki/Rope_%28data_structure%29 -[persistent data structure]: http://en.wikipedia.org/wiki/Persistent_data_structure -[2-3 finger tree]: http://staff.city.ac.uk/~ross/papers/FingerTree.html +[WTF-8]: https://simonsapin.github.io/wtf-8/ +[rope]: https://en.wikipedia.org/wiki/Rope_%28data_structure%29 +[persistent data structure]: https://en.wikipedia.org/wiki/Persistent_data_structure +[2-3 finger tree]: https://www.staff.city.ac.uk/~ross/papers/FingerTree.html diff --git a/capi/Cargo.toml b/capi/Cargo.toml deleted file mode 100644 index 7223ca0..0000000 --- a/capi/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] - -name = "tendril_capi" -version = "0.0.1" -authors = ["Keegan McAllister "] -repository = "https://github.com/kmcallister/tendril" -description = "C API for tendril" -build = "build.rs" - -[lib] -name = "tendril_capi" -crate-type = ["staticlib"] - -[dependencies] -libc = "0.1" - -[dependencies.tendril] -path = "../" - -[build-dependencies] -gcc = "0" diff --git a/capi/build.rs b/capi/build.rs deleted file mode 100644 index a740c8a..0000000 --- a/capi/build.rs +++ /dev/null @@ -1,12 +0,0 @@ -#![deny(warnings)] -#![allow(deprecated)] - -extern crate gcc; - -fn main() { - gcc::Config::new() - .file("src/glue.c") - .flag("-O3").flag("-fPIC") - .include("include") - .compile("libtendril_cglue.a"); -} diff --git a/capi/ctest/.gitignore b/capi/ctest/.gitignore deleted file mode 100644 index e08d574..0000000 --- a/capi/ctest/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -libtendril_capi.a -out.actual -test diff --git a/capi/ctest/build-and-test.sh b/capi/ctest/build-and-test.sh deleted file mode 100755 index 36542a1..0000000 --- a/capi/ctest/build-and-test.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -set -xe - -(cd ..; cargo build) -gcc -o test test.c -Wall -I ../include -L ../target/debug -ltendril_capi -ldl -lpthread -lrt -lgcc_s -lpthread -lc -lm -./test > out.actual -diff -u out.expect out.actual diff --git a/capi/ctest/out.expect b/capi/ctest/out.expect deleted file mode 100644 index a7b7288..0000000 --- a/capi/ctest/out.expect +++ /dev/null @@ -1,6 +0,0 @@ -Hello, 2015! -Tendril(shared: [72, 101, 108, 108, 111, 44, 32, 50, 48, 49, 53, 33, 10]) -Tendril(inline: [72, 101, 108, 108, 111]) -Tendril(shared: [72, 101, 108, 108, 111, 44, 32, 50, 48, 49, 53, 33, 10]) -HelloHello, 2015! -Appending diff --git a/capi/ctest/test.c b/capi/ctest/test.c deleted file mode 100644 index 1431484..0000000 --- a/capi/ctest/test.c +++ /dev/null @@ -1,28 +0,0 @@ -#include - -#include "tendril.h" - -int main() { - tendril t = TENDRIL_INIT; - tendril_sprintf(&t, "Hello, %d!\n", 2015); - tendril_fwrite(&t, stdout); - - tendril_debug_dump(&t, stdout); - puts(""); - - tendril s = TENDRIL_INIT; - tendril_sub(&s, &t, 0, 9); - tendril_pop_back(&s, 4); - tendril_debug_dump(&s, stdout); - puts(""); - tendril_debug_dump(&t, stdout); - puts(""); - - tendril_sprintf(&t, "Appending\n"); - tendril_fwrite(&s, stdout); - tendril_fwrite(&t, stdout); - - tendril_destroy(&s); - tendril_destroy(&t); - return 0; -} diff --git a/capi/include/tendril.h b/capi/include/tendril.h deleted file mode 100644 index 8ee0d70..0000000 --- a/capi/include/tendril.h +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#ifndef _TENDRIL_H -#define _TENDRIL_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// C equivalent of `ByteTendril`. -// -// See https://kmcallister.github.io/docs/tendril/tendril/struct.Tendril.html -// -// This is a small structure, probably 12 or 16 bytes. You can allocate it -// anywhere you like, but you *must* initialize it with `TENDRIL_INIT`. -// -// Functions that replace the content of the tendril will take care of -// deallocating any storage that becomes unused. The closest to an explicit -// "free" is tendril_destroy, but this leaves the target in a valid state: -// an empty tendril, which does not own a heap allocation. -// -// This API does not pass `tendril` by value at any point. If your code does -// this, you should interpret it as a transfer of ownership, and refrain from -// using the source value afterwards. See also `tendril_clone`. -// -// *Warning*: It is not safe to send or share tendrils between threads! -typedef struct tendril_impl tendril; - -// Initializer expression for a tendril. -// -// It is *never* safe to pass an uninitialized tendril to one of these functions. -#define TENDRIL_INIT { 0xF, 0, 0 } - -// Get a pointer to the data in a tendril. -static inline char *tendril_data(const tendril *t); - -// Get the number of bytes stored in a tendril. -static inline uint32_t tendril_len(const tendril *t); - -// Replace `t` with a copy of `r`. -// -// This will share the backing storage when practical. -void tendril_clone(tendril *t, const tendril *r); - -// Replace `t` with a slice of `r`. -// -// This will share the backing storage when practical. -void tendril_sub(tendril *t, const tendril *r, uint32_t offset, uint32_t length); - -// Deallocate any storage associated with the tendril, and replace it with -// an empty tendril (which does not own a heap allocation). -void tendril_destroy(tendril *t); - -// Truncate to length 0 *without* discarding any owned storage. -void tendril_clear(tendril *t); - -// Push some bytes onto the back of the tendril. -void tendril_push_buffer(tendril *t, const char *buffer, uint32_t length); - -// Push another tendril onto the back. -void tendril_push_tendril(tendril *t, const tendril *r); - -// Push "uninitialized bytes" onto the back. -// -// Really, this grows the tendril without writing anything to the new area. -void tendril_push_uninit(tendril *t, uint32_t n); - -// Remove bytes from the front. -void tendril_pop_front(tendril *t, uint32_t n); - -// Remove bytes from the back. -void tendril_pop_back(tendril *t, uint32_t n); - -// Replace `desc` with a tendril that describes (in ASCII text) the tendril -// `t`, including some details of how it is stored. -void tendril_debug_describe(tendril *desc, const tendril *t); - -// Push text onto the back of a tendril according to a format string. -// -// This does *not* push a NULL terminator. -int tendril_sprintf(tendril *t, const char *format, ...); - -// See tendril_sprintf. -int tendril_vsprintf(tendril *t, const char *format, va_list ap); - -// Write the bytes of the tendril to a stdio stream. -size_t tendril_fwrite(const tendril *t, FILE *stream); - -// Write a description in ASCII text of the tendril `t`, including some -// details of how it is stored. -void tendril_debug_dump(const tendril *t, FILE *stream); - -//// -//// implementation details follow -//// - -struct tendril_impl { - uintptr_t __ptr; - uint32_t __a; - uint32_t __b; -}; - -#define __TENDRIL_EMPTY_TAG 0xF -#define __TENDRIL_MAX_INLINE_TAG 0xF -#define __TENDRIL_HEADER_LEN (sizeof(char *) + 4) - -static inline char *tendril_data(const tendril *t) { - uintptr_t p = t->__ptr; - if (p <= __TENDRIL_MAX_INLINE_TAG) { - return (char *) &t->__a; - } else { - return (char *) ((p & ~1) + __TENDRIL_HEADER_LEN); - } -} - -static inline uint32_t tendril_len(const tendril *t) { - uintptr_t p = t->__ptr; - if (p == __TENDRIL_EMPTY_TAG) { - return 0; - } else if (p <= __TENDRIL_MAX_INLINE_TAG) { - return p; - } else { - return t->__a; - } -} - -#undef __TENDRIL_EMPTY_TAG -#undef __TENDRIL_MAX_INLINE_TAG -#undef __TENDRIL_HEADER_LEN - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/capi/src/glue.c b/capi/src/glue.c deleted file mode 100644 index eb5f695..0000000 --- a/capi/src/glue.c +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#include -#include -#include - -#include "tendril.h" - -int tendril_vsprintf(tendril *t, const char *format, va_list args) { - // This is a lot like asprintf. - va_list args_copy; - va_copy(args_copy, args); - - int ret = vsnprintf(NULL, 0, format, args); - - if (ret > 0xFFFFFFFF) { - errno = E2BIG; - ret = -1; - } else if (ret >= 0) { - uint32_t addnl = ret + 1; // include null terminator - uint32_t old_len = tendril_len(t); - tendril_push_uninit(t, addnl); - - ret = vsnprintf(tendril_data(t) + old_len, addnl, format, args_copy); - - // Pop the NULL terminator. - tendril_pop_back(t, 1); - } - - va_end(args_copy); - return ret; -} - -int tendril_sprintf(tendril *t, const char *format, ...) { - va_list args; - va_start(args, format); - int ret = tendril_vsprintf(t, format, args); - va_end(args); - return ret; -} - -void tendril_debug_dump(const tendril *t, FILE *stream) { - tendril dbg = TENDRIL_INIT; - tendril_debug_describe(&dbg, t); - tendril_fwrite(&dbg, stream); - tendril_destroy(&dbg); -} - -size_t tendril_fwrite(const tendril *t, FILE *stream) { - return fwrite(tendril_data(t), 1, tendril_len(t), stream); -} diff --git a/capi/src/lib.rs b/capi/src/lib.rs deleted file mode 100644 index 96fd329..0000000 --- a/capi/src/lib.rs +++ /dev/null @@ -1,71 +0,0 @@ -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![warn(warnings)] - -extern crate libc; -extern crate tendril; - -use tendril::{ByteTendril, StrTendril}; -use std::slice; - -// Link the C glue code -#[link_name="tendril_cglue"] -extern "C" { } - -#[no_mangle] pub unsafe extern "C" -fn tendril_clone(t: *mut ByteTendril, r: *const ByteTendril) { - *t = (*r).clone(); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_sub(t: *mut ByteTendril, - r: *const ByteTendril, - offset: u32, - length: u32) { - *t = (*r).subtendril(offset, length); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_destroy(t: *mut ByteTendril) { - *t = ByteTendril::new(); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_clear(t: *mut ByteTendril) { - (*t).clear(); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_push_buffer(t: *mut ByteTendril, buffer: *const u8, length: u32) { - (*t).push_slice(slice::from_raw_parts(buffer, length as usize)); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_push_tendril(t: *mut ByteTendril, r: *const ByteTendril) { - (*t).push_tendril(&*r); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_push_uninit(t: *mut ByteTendril, n: u32) { - (*t).push_uninitialized(n); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_pop_front(t: *mut ByteTendril, n: u32) { - (*t).pop_front(n); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_pop_back(t: *mut ByteTendril, n: u32) { - (*t).pop_back(n); -} - -#[no_mangle] pub unsafe extern "C" -fn tendril_debug_describe(desc: *mut ByteTendril, t: *const ByteTendril) { - use std::fmt::Write; - let _ = write!(&mut *(desc as *mut StrTendril), "{:?}", *t); -} diff --git a/examples/fuzz.rs b/examples/fuzz.rs index 6d77f67..37daf56 100644 --- a/examples/fuzz.rs +++ b/examples/fuzz.rs @@ -1,6 +1,6 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. @@ -13,8 +13,8 @@ extern crate tendril; use std::borrow::ToOwned; -use rand::Rng; use rand::distributions::{IndependentSample, Range}; +use rand::Rng; use tendril::StrTendril; fn fuzz() { @@ -25,7 +25,7 @@ fn fuzz() { let mut string_slices = vec![]; let mut tendril_slices = vec![]; - for _ in 1 .. 100_000 { + for _ in 1..100_000 { if buf_string.len() > (1 << 30) { buf_string.truncate(0); buf_tendril.clear(); @@ -33,7 +33,7 @@ fn fuzz() { let dist_action = Range::new(0, 100); match dist_action.ind_sample(&mut rng) { - 0...15 => { + 0..=15 => { let (start, end) = random_slice(&mut rng, TEXT); let snip = &TEXT[start..end]; buf_string.push_str(snip); @@ -41,7 +41,7 @@ fn fuzz() { assert_eq!(&*buf_string, &*buf_tendril); } - 16...31 => { + 16..=31 => { let (start, end) = random_slice(&mut rng, &buf_string); let snip = &buf_string[start..end].to_owned(); buf_string.push_str(&snip); @@ -49,21 +49,21 @@ fn fuzz() { assert_eq!(&*buf_string, &*buf_tendril); } - 32...47 => { + 32..=47 => { let lenstr = format!("[length = {}]", buf_tendril.len()); buf_string.push_str(&lenstr); buf_tendril.push_slice(&lenstr); assert_eq!(&*buf_string, &*buf_tendril); } - 48...63 => { + 48..=63 => { let n = random_boundary(&mut rng, &buf_string); buf_tendril.pop_front(n as u32); buf_string = buf_string[n..].to_owned(); assert_eq!(&*buf_string, &*buf_tendril); } - 64...79 => { + 64..=79 => { let new_len = random_boundary(&mut rng, &buf_string); let n = buf_string.len() - new_len; buf_string.truncate(new_len); @@ -71,14 +71,14 @@ fn fuzz() { assert_eq!(&*buf_string, &*buf_tendril); } - 80...90 => { + 80..=90 => { let (start, end) = random_slice(&mut rng, &buf_string); buf_string = buf_string[start..end].to_owned(); buf_tendril = buf_tendril.subtendril(start as u32, (end - start) as u32); assert_eq!(&*buf_string, &*buf_tendril); } - 91...96 => { + 91..=96 => { let c = rng.gen(); buf_string.push(c); assert!(buf_tendril.try_push_char(c).is_ok()); @@ -96,7 +96,10 @@ fn fuzz() { string_slices.push(buf_string[start..end].to_owned()); tendril_slices.push(buf_tendril.subtendril(start as u32, (end - start) as u32)); assert_eq!(string_slices.len(), tendril_slices.len()); - assert!(string_slices.iter().zip(tendril_slices.iter()).all(|(s,t)| **s == **t)); + assert!(string_slices + .iter() + .zip(tendril_slices.iter()) + .all(|(s, t)| **s == **t)); } } } @@ -104,7 +107,7 @@ fn fuzz() { fn random_boundary(rng: &mut R, text: &str) -> usize { loop { - let i = Range::new(0, text.len()+1).ind_sample(rng); + let i = Range::new(0, text.len() + 1).ind_sample(rng); if text.is_char_boundary(i) { return i; } @@ -113,8 +116,8 @@ fn random_boundary(rng: &mut R, text: &str) -> usize { fn random_slice(rng: &mut R, text: &str) -> (usize, usize) { loop { - let start = Range::new(0, text.len()+1).ind_sample(rng); - let end = Range::new(start, text.len()+1).ind_sample(rng); + let start = Range::new(0, text.len() + 1).ind_sample(rng); + let end = Range::new(start, text.len() + 1).ind_sample(rng); if !text.is_char_boundary(start) { continue; } diff --git a/src/bench.rs b/src/bench.rs index 9213d4a..a9d2c30 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -1,11 +1,11 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::collections::hash_map::{HashMap, Entry}; use std::borrow::ToOwned; +use std::collections::hash_map::{Entry, HashMap}; use tendril::StrTendril; @@ -21,7 +21,9 @@ fn index_words_string(input: &String) -> HashMap> { let x: &mut Vec = e.get_mut(); x.push(word); } - Entry::Vacant(e) => { e.insert(vec![word]); } + Entry::Vacant(e) => { + e.insert(vec![word]); + } } } index @@ -35,18 +37,21 @@ fn index_words_tendril(input: &StrTendril) -> HashMap> { None => return index, Some((_, false)) => (), Some((word, true)) => match index.entry(word.chars().next().unwrap()) { - Entry::Occupied(mut e) => { e.get_mut().push(word); } - Entry::Vacant(e) => { e.insert(vec![word]); } - } + Entry::Occupied(mut e) => { + e.get_mut().push(word); + } + Entry::Vacant(e) => { + e.insert(vec![word]); + } + }, } } } -static EN_1: &'static str - = "Days turn to nights turn to paper into rocks into plastic"; +static EN_1: &'static str = "Days turn to nights turn to paper into rocks into plastic"; -static EN_2: &'static str - = "Here the notes in my laboratory journal cease. I was able to write the last \ +static EN_2: &'static str = + "Here the notes in my laboratory journal cease. I was able to write the last \ words only with great effort. By now it was already clear to me that LSD had \ been the cause of the remarkable experience of the previous Friday, for the \ altered perceptions were of the same type as before, only much more intense. I \ @@ -62,13 +67,13 @@ static EN_2: &'static str In spite of my delirious, bewildered condition, I had brief periods of clear \ and effective thinking—and chose milk as a nonspecific antidote for poisoning."; -static KR_1: &'static str - = "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \ +static KR_1: &'static str = + "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \ 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \ 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다."; -static HTML_KR_1: &'static str - = "

러스트(Rust)는 모질라(러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, \ 메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \ 아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.

"; @@ -87,9 +92,7 @@ mod index_words { while s.len() < SMALL_SIZE { s.push_str(::tendril::bench::$txt); } - b.iter(|| { - ::tendril::bench::index_words_string(&s) - }); + b.iter(|| ::tendril::bench::index_words_string(&s)); } #[bench] @@ -98,9 +101,7 @@ mod index_words { while t.len() < SMALL_SIZE { t.push_slice(::tendril::bench::$txt); } - b.iter(|| { - ::tendril::bench::index_words_tendril(&t) - }); + b.iter(|| ::tendril::bench::index_words_tendril(&t)); } #[bench] @@ -109,9 +110,7 @@ mod index_words { while s.len() < LARGE_SIZE { s.push_str(::tendril::bench::$txt); } - b.iter(|| { - ::tendril::bench::index_words_string(&s) - }); + b.iter(|| ::tendril::bench::index_words_string(&s)); } #[bench] @@ -120,16 +119,14 @@ mod index_words { while t.len() < LARGE_SIZE { t.push_slice(::tendril::bench::$txt); } - b.iter(|| { - ::tendril::bench::index_words_tendril(&t) - }); + b.iter(|| ::tendril::bench::index_words_tendril(&t)); } #[test] fn correctness() { use std::borrow::ToOwned; - use tendril::SliceExt; use tendril::bench::{index_words_string, index_words_tendril}; + use tendril::SliceExt; let txt = ::tendril::bench::$txt; let input_string = txt.to_owned(); @@ -152,7 +149,7 @@ mod index_words { } } } - } + }; } bench!(EN_1); diff --git a/src/buf32.rs b/src/buf32.rs index 3b1f911..d60a277 100644 --- a/src/buf32.rs +++ b/src/buf32.rs @@ -1,12 +1,12 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Provides an unsafe owned buffer type, used in implementing `Tendril`. -use std::{mem, ptr, u32, slice}; +use std::{mem, ptr, slice, u32}; use OFLOW; @@ -27,7 +27,7 @@ fn bytes_to_vec_capacity(x: u32) -> usize { let header = mem::size_of::(); debug_assert!(header > 0); let x = (x as usize).checked_add(header).expect(OFLOW); - // Integer ceil http://stackoverflow.com/a/2745086/1162888 + // Integer ceil https://stackoverflow.com/a/2745086/1162888 1 + ((x - 1) / header) } @@ -52,7 +52,11 @@ impl Buf32 { #[inline] pub unsafe fn destroy(self) { - mem::drop(Vec::from_raw_parts(self.ptr, 1, bytes_to_vec_capacity::(self.cap))); + mem::drop(Vec::from_raw_parts( + self.ptr, + 1, + bytes_to_vec_capacity::(self.cap), + )); } #[inline(always)] diff --git a/src/fmt.rs b/src/fmt.rs index d414814..2ff04bb 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -1,6 +1,6 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. @@ -19,21 +19,18 @@ //! the format sneaks in. For that reason, these traits require //! `unsafe impl`. -use std::{char, str, mem}; use std::default::Default; -use std::io::Write; +use std::{char, mem, str}; use futf::{self, Codepoint, Meaning}; -use util::unsafe_slice; - /// Implementation details. /// /// You don't need these unless you are implementing /// a new format. pub mod imp { - use std::{iter, slice, mem}; use std::default::Default; + use std::{iter, mem, slice}; /// Describes how to fix up encodings when concatenating. /// @@ -72,9 +69,9 @@ pub mod imp { #[inline] fn next(&mut self) -> Option<(usize, char)> { - self.inner.next().map(|(i, &b)| unsafe { - (i, from_u32_unchecked(b as u32)) - }) + self.inner + .next() + .map(|(i, &b)| unsafe { (i, from_u32_unchecked(b as u32)) }) } } @@ -140,7 +137,8 @@ pub unsafe trait Format { /// The subset format can be converted to the superset format /// for free. pub unsafe trait SubsetOf: Format - where Super: Format, +where + Super: Format, { /// Validate the *other* direction of conversion; check if /// this buffer from the superset format conforms to the @@ -164,7 +162,7 @@ pub unsafe trait SliceFormat: Format + Sized { /// (all of it, or some proper subset). pub unsafe trait CharFormat<'a>: Format { /// Iterator for characters and their byte indices. - type Iter: Iterator; + type Iter: Iterator; /// Iterate over the characters of the string and their byte /// indices. @@ -176,7 +174,8 @@ pub unsafe trait CharFormat<'a>: Format { /// /// Returns `Err(())` iff the character cannot be represented. fn encode_char(ch: char, cont: F) -> Result<(), ()> - where F: FnOnce(&[u8]); + where + F: FnOnce(&[u8]); } /// Indicates a Rust slice type that is represented in memory as bytes. @@ -257,8 +256,8 @@ unsafe impl Format for ASCII { } } -unsafe impl SubsetOf for ASCII { } -unsafe impl SubsetOf for ASCII { } +unsafe impl SubsetOf for ASCII {} +unsafe impl SubsetOf for ASCII {} unsafe impl<'a> CharFormat<'a> for ASCII { type Iter = imp::SingleByteCharIndices<'a>; @@ -270,10 +269,13 @@ unsafe impl<'a> CharFormat<'a> for ASCII { #[inline] fn encode_char(ch: char, cont: F) -> Result<(), ()> - where F: FnOnce(&[u8]) + where + F: FnOnce(&[u8]), { let n = ch as u32; - if n > 0x7F { return Err(()); } + if n > 0x7F { + return Err(()); + } cont(&[n as u8]); Ok(()) } @@ -295,7 +297,10 @@ unsafe impl Format for UTF8 { return true; } match futf::classify(buf, buf.len() - 1) { - Some(Codepoint { meaning: Meaning::Whole(_), .. }) => true, + Some(Codepoint { + meaning: Meaning::Whole(_), + .. + }) => true, _ => false, } } @@ -306,19 +311,21 @@ unsafe impl Format for UTF8 { return true; } match futf::classify(buf, 0) { - Some(Codepoint { meaning: Meaning::Whole(_), .. }) => true, + Some(Codepoint { + meaning: Meaning::Whole(_), + .. + }) => true, _ => false, } } #[inline] fn validate_subseq(buf: &[u8]) -> bool { - ::validate_prefix(buf) - && ::validate_suffix(buf) + ::validate_prefix(buf) && ::validate_suffix(buf) } } -unsafe impl SubsetOf for UTF8 { } +unsafe impl SubsetOf for UTF8 {} unsafe impl SliceFormat for UTF8 { type Slice = str; @@ -351,33 +358,24 @@ unsafe impl<'a> CharFormat<'a> for UTF8 { #[inline] fn encode_char(ch: char, cont: F) -> Result<(), ()> - where F: FnOnce(&[u8]) + where + F: FnOnce(&[u8]), { - unsafe { - let mut utf_8: [u8; 4] = mem::uninitialized(); - let bytes_written = { - let mut buffer = &mut utf_8[..]; - write!(buffer, "{}", ch).ok().expect("Tendril: internal error"); - debug_assert!(buffer.len() <= 4); - 4 - buffer.len() - }; - cont(unsafe_slice(&utf_8, 0, bytes_written)); - Ok(()) - } + cont(ch.encode_utf8(&mut [0_u8; 4]).as_bytes()); + Ok(()) } } /// Marker type for WTF-8 text. /// -/// See the [WTF-8 spec](http://simonsapin.github.io/wtf-8/). +/// See the [WTF-8 spec](https://simonsapin.github.io/wtf-8/). #[derive(Copy, Clone, Default, Debug)] pub struct WTF8; #[inline] fn wtf8_meaningful(m: Meaning) -> bool { match m { - Meaning::Whole(_) | Meaning::LeadSurrogate(_) - | Meaning::TrailSurrogate(_) => true, + Meaning::Whole(_) | Meaning::LeadSurrogate(_) | Meaning::TrailSurrogate(_) => true, _ => false, } } @@ -427,8 +425,7 @@ unsafe impl Format for WTF8 { #[inline] fn validate_subseq(buf: &[u8]) -> bool { - ::validate_prefix(buf) - && ::validate_suffix(buf) + ::validate_prefix(buf) && ::validate_suffix(buf) } #[inline] @@ -436,25 +433,28 @@ unsafe impl Format for WTF8 { const ERR: &'static str = "WTF8: internal error"; if lhs.len() >= 3 && rhs.len() >= 3 { - if let (Some(Codepoint { meaning: Meaning::LeadSurrogate(hi), .. }), - Some(Codepoint { meaning: Meaning::TrailSurrogate(lo), .. })) - = (futf::classify(lhs, lhs.len() - 1), futf::classify(rhs, 0)) + if let ( + Some(Codepoint { + meaning: Meaning::LeadSurrogate(hi), + .. + }), + Some(Codepoint { + meaning: Meaning::TrailSurrogate(lo), + .. + }), + ) = (futf::classify(lhs, lhs.len() - 1), futf::classify(rhs, 0)) { let mut fixup = imp::Fixup { drop_left: 3, drop_right: 3, insert_len: 0, - insert_bytes: mem::uninitialized(), + insert_bytes: [0_u8; 4], }; let n = 0x10000 + ((hi as u32) << 10) + (lo as u32); - fixup.insert_len = { - let mut buffer = &mut fixup.insert_bytes[..]; - write!(buffer, "{}", char::from_u32(n).expect(ERR)).ok().expect(ERR); - debug_assert!(buffer.len() <= 4); - 4 - buffer.len() as u32 - }; + let ch = char::from_u32(n).expect(ERR); + fixup.insert_len = ch.encode_utf8(&mut fixup.insert_bytes).len() as u32; return fixup; } @@ -506,10 +506,13 @@ unsafe impl<'a> CharFormat<'a> for Latin1 { #[inline] fn encode_char(ch: char, cont: F) -> Result<(), ()> - where F: FnOnce(&[u8]) + where + F: FnOnce(&[u8]), { let n = ch as u32; - if n > 0xFF { return Err(()); } + if n > 0xFF { + return Err(()); + } cont(&[n as u8]); Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index a078716..33782fd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,31 +1,35 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. #![cfg_attr(all(test, feature = "bench"), feature(test))] -#![cfg_attr(test, deny(warnings))] +//#![cfg_attr(test, deny(warnings))] -#[cfg(all(test, feature = "bench"))] extern crate test; -#[cfg(feature = "encoding")] pub extern crate encoding; -#[cfg(feature = "encoding_rs")] pub extern crate encoding_rs; -#[macro_use] extern crate mac; +#[cfg(feature = "encoding")] +pub extern crate encoding; +#[cfg(feature = "encoding_rs")] +pub extern crate encoding_rs; +#[cfg(all(test, feature = "bench"))] +extern crate test; +#[macro_use] +extern crate mac; extern crate futf; extern crate utf8; -pub use tendril::{Tendril, ByteTendril, StrTendril, SliceExt, ReadExt, SubtendrilError}; -pub use tendril::{SendTendril, Atomicity, Atomic, NonAtomic}; pub use fmt::Format; pub use stream::TendrilSink; +pub use tendril::{Atomic, Atomicity, NonAtomic, SendTendril}; +pub use tendril::{ByteTendril, ReadExt, SliceExt, StrTendril, SubtendrilError, Tendril}; pub use utf8_decode::IncompleteUtf8; pub mod fmt; pub mod stream; -mod util; mod buf32; mod tendril; mod utf8_decode; +mod util; static OFLOW: &'static str = "tendril: overflow in buffer arithmetic"; diff --git a/src/stream.rs b/src/stream.rs index 999c715..469d58c 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -1,13 +1,13 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Streams of tendrils. -use tendril::{Tendril, Atomicity, NonAtomic}; use fmt; +use tendril::{Atomicity, NonAtomic, Tendril}; use std::borrow::Cow; use std::fs::File; @@ -15,8 +15,10 @@ use std::io; use std::marker::PhantomData; use std::path::Path; -#[cfg(feature = "encoding")] use encoding; -#[cfg(feature = "encoding_rs")] use encoding_rs::{self, DecoderResult}; +#[cfg(feature = "encoding")] +use encoding; +#[cfg(feature = "encoding_rs")] +use encoding_rs::{self, DecoderResult}; use utf8; /// Trait for types that can process a tendril. @@ -27,9 +29,10 @@ use utf8; /// architecture. /// /// [html5ever]: https://github.com/servo/html5ever -pub trait TendrilSink - where F: fmt::Format, - A: Atomicity, +pub trait TendrilSink +where + F: fmt::Format, + A: Atomicity, { /// Process this tendril. fn process(&mut self, t: Tendril); @@ -44,14 +47,22 @@ pub trait TendrilSink fn finish(self) -> Self::Output; /// Process one tendril and finish. - fn one(mut self, t: T) -> Self::Output where Self: Sized, T: Into> { + fn one(mut self, t: T) -> Self::Output + where + Self: Sized, + T: Into>, + { self.process(t.into()); self.finish() } /// Consume an iterator of tendrils, processing each item, then finish. fn from_iter(mut self, i: I) -> Self::Output - where Self: Sized, I: IntoIterator, I::Item: Into> { + where + Self: Sized, + I: IntoIterator, + I::Item: Into>, + { for t in i { self.process(t.into()) } @@ -61,7 +72,11 @@ pub trait TendrilSink /// Read from the given stream of bytes until exhaustion and process incrementally, /// then finish. Return `Err` at the first I/O error. fn read_from(mut self, r: &mut R) -> io::Result - where Self: Sized, R: io::Read, F: fmt::SliceFormat { + where + Self: Sized, + R: io::Read, + F: fmt::SliceFormat, + { const BUFFER_SIZE: u32 = 4 * 1024; loop { let mut tendril = Tendril::::new(); @@ -78,21 +93,24 @@ pub trait TendrilSink Ok(n) => { tendril.pop_back(BUFFER_SIZE - n as u32); self.process(tendril); - break + break; } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e) + Err(e) => return Err(e), } } } } - /// Read from the file at the given path and process incrementally, /// then finish. Return `Err` at the first I/O error. fn from_file

(self, path: P) -> io::Result - where Self: Sized, P: AsRef, F: fmt::SliceFormat { - self.read_from(&mut try!(File::open(path))) + where + Self: Sized, + P: AsRef, + F: fmt::SliceFormat, + { + self.read_from(&mut File::open(path)?) } } @@ -102,9 +120,10 @@ pub trait TendrilSink /// /// This does not allocate memory: the output is either subtendrils on the input, /// on inline tendrils for a single code point. -pub struct Utf8LossyDecoder - where Sink: TendrilSink, - A: Atomicity +pub struct Utf8LossyDecoder +where + Sink: TendrilSink, + A: Atomicity, { pub inner_sink: Sink, incomplete: Option, @@ -112,8 +131,9 @@ pub struct Utf8LossyDecoder } impl Utf8LossyDecoder - where Sink: TendrilSink, - A: Atomicity, +where + Sink: TendrilSink, + A: Atomicity, { /// Create a new incremental UTF-8 decoder. #[inline] @@ -127,8 +147,9 @@ impl Utf8LossyDecoder } impl TendrilSink for Utf8LossyDecoder - where Sink: TendrilSink, - A: Atomicity, +where + Sink: TendrilSink, + A: Atomicity, { #[inline] fn process(&mut self, mut t: Tendril) { @@ -136,12 +157,11 @@ impl TendrilSink for Utf8LossyDecoder if let Some(mut incomplete) = self.incomplete.take() { let resume_at = incomplete.try_complete(&t).map(|(result, rest)| { match result { - Ok(s) => { - self.inner_sink.process(Tendril::from_slice(s)) - } + Ok(s) => self.inner_sink.process(Tendril::from_slice(s)), Err(_) => { self.inner_sink.error("invalid byte sequence".into()); - self.inner_sink.process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); + self.inner_sink + .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); } } t.len() - rest.len() @@ -149,11 +169,9 @@ impl TendrilSink for Utf8LossyDecoder match resume_at { None => { self.incomplete = Some(incomplete); - return - } - Some(resume_at) => { - t.pop_front(resume_at as u32) + return; } + Some(resume_at) => t.pop_front(resume_at as u32), } } while !t.is_empty() { @@ -163,12 +181,22 @@ impl TendrilSink for Utf8LossyDecoder debug_assert!(s.len() == t.len()); Ok(()) } - Err(utf8::DecodeError::Invalid { valid_prefix, invalid_sequence, .. }) => { + Err(utf8::DecodeError::Invalid { + valid_prefix, + invalid_sequence, + .. + }) => { debug_assert!(valid_prefix.as_ptr() == t.as_ptr()); debug_assert!(valid_prefix.len() <= t.len()); - Err((valid_prefix.len(), Err(valid_prefix.len() + invalid_sequence.len()))) + Err(( + valid_prefix.len(), + Err(valid_prefix.len() + invalid_sequence.len()), + )) } - Err(utf8::DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => { + Err(utf8::DecodeError::Incomplete { + valid_prefix, + incomplete_suffix, + }) => { debug_assert!(valid_prefix.as_ptr() == t.as_ptr()); debug_assert!(valid_prefix.len() <= t.len()); Err((valid_prefix.len(), Ok(incomplete_suffix))) @@ -176,26 +204,26 @@ impl TendrilSink for Utf8LossyDecoder }; match unborrowed_result { Ok(()) => { - unsafe { - self.inner_sink.process(t.reinterpret_without_validating()) - } - return + unsafe { self.inner_sink.process(t.reinterpret_without_validating()) } + return; } Err((valid_len, and_then)) => { if valid_len > 0 { let subtendril = t.subtendril(0, valid_len as u32); unsafe { - self.inner_sink.process(subtendril.reinterpret_without_validating()) + self.inner_sink + .process(subtendril.reinterpret_without_validating()) } } match and_then { Ok(incomplete) => { self.incomplete = Some(incomplete); - return + return; } Err(offset) => { self.inner_sink.error("invalid byte sequence".into()); - self.inner_sink.process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); + self.inner_sink + .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); t.pop_front(offset as u32); } } @@ -214,8 +242,10 @@ impl TendrilSink for Utf8LossyDecoder #[inline] fn finish(mut self) -> Sink::Output { if self.incomplete.is_some() { - self.inner_sink.error("incomplete byte sequence at end of stream".into()); - self.inner_sink.process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); + self.inner_sink + .error("incomplete byte sequence at end of stream".into()); + self.inner_sink + .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); } self.inner_sink.finish() } @@ -227,16 +257,20 @@ impl TendrilSink for Utf8LossyDecoder /// /// This allocates new tendrils for encodings other than UTF-8. #[cfg(any(feature = "encoding", feature = "encoding_rs"))] -pub struct LossyDecoder - where Sink: TendrilSink, - A: Atomicity { +pub struct LossyDecoder +where + Sink: TendrilSink, + A: Atomicity, +{ inner: LossyDecoderInner, } #[cfg(any(feature = "encoding", feature = "encoding_rs"))] enum LossyDecoderInner - where Sink: TendrilSink, - A: Atomicity { +where + Sink: TendrilSink, + A: Atomicity, +{ Utf8(Utf8LossyDecoder), #[cfg(feature = "encoding")] Encoding(Box, Sink), @@ -246,8 +280,9 @@ enum LossyDecoderInner #[cfg(any(feature = "encoding", feature = "encoding_rs"))] impl LossyDecoder - where Sink: TendrilSink, - A: Atomicity, +where + Sink: TendrilSink, + A: Atomicity, { /// Create a new incremental decoder using the encoding crate. #[cfg(feature = "encoding")] @@ -257,7 +292,7 @@ impl LossyDecoder LossyDecoder::utf8(sink) } else { LossyDecoder { - inner: LossyDecoderInner::Encoding(encoding.raw_decoder(), sink) + inner: LossyDecoderInner::Encoding(encoding.raw_decoder(), sink), } } } @@ -270,7 +305,7 @@ impl LossyDecoder return Self::utf8(sink); } Self { - inner: LossyDecoderInner::EncodingRs(encoding.new_decoder(), sink) + inner: LossyDecoderInner::EncodingRs(encoding.new_decoder(), sink), } } @@ -281,7 +316,7 @@ impl LossyDecoder #[inline] pub fn utf8(sink: Sink) -> LossyDecoder { LossyDecoder { - inner: LossyDecoderInner::Utf8(Utf8LossyDecoder::new(sink)) + inner: LossyDecoderInner::Utf8(Utf8LossyDecoder::new(sink)), } } @@ -310,8 +345,9 @@ impl LossyDecoder #[cfg(any(feature = "encoding", feature = "encoding_rs"))] impl TendrilSink for LossyDecoder - where Sink: TendrilSink, - A: Atomicity, +where + Sink: TendrilSink, + A: Atomicity, { #[inline] fn process(&mut self, t: Tendril) { @@ -336,14 +372,14 @@ impl TendrilSink for LossyDecoder if out.len() > 0 { sink.process(out); } - }, + } #[cfg(feature = "encoding_rs")] LossyDecoderInner::EncodingRs(ref mut decoder, ref mut sink) => { if t.is_empty() { return; } decode_to_sink(t, decoder, sink, false); - }, + } } } @@ -375,7 +411,7 @@ impl TendrilSink for LossyDecoder sink.process(out); } sink.finish() - }, + } #[cfg(feature = "encoding_rs")] LossyDecoderInner::EncodingRs(mut decoder, mut sink) => { decode_to_sink(Tendril::new(), &mut decoder, &mut sink, true); @@ -391,8 +427,7 @@ fn decode_to_sink( decoder: &mut encoding_rs::Decoder, sink: &mut Sink, last: bool, -) -where +) where Sink: TendrilSink, A: Atomicity, { @@ -408,18 +443,17 @@ where decoder.decode_to_utf8_without_replacement(&t, &mut out, last); if bytes_written > 0 { sink.process(unsafe { - out - .subtendril(0, bytes_written as u32) + out.subtendril(0, bytes_written as u32) .reinterpret_without_validating() }); } match result { DecoderResult::InputEmpty => return, - DecoderResult::OutputFull => {}, + DecoderResult::OutputFull => {} DecoderResult::Malformed(_, _) => { sink.error(Cow::Borrowed("invalid sequence")); sink.process("\u{FFFD}".into()); - }, + } } t.pop_front(bytes_read as u32); if t.is_empty() { @@ -431,27 +465,31 @@ where #[cfg(test)] mod test { use super::{TendrilSink, Utf8LossyDecoder}; - use tendril::{Tendril, Atomicity, NonAtomic}; use fmt; use std::borrow::Cow; + use tendril::{Atomicity, NonAtomic, Tendril}; #[cfg(any(feature = "encoding", feature = "encoding_rs"))] use super::LossyDecoder; #[cfg(any(feature = "encoding", feature = "encoding_rs"))] use tendril::SliceExt; - #[cfg(feature = "encoding")] use encoding::all as enc; - #[cfg(feature = "encoding_rs")] use encoding_rs as enc_rs; + #[cfg(feature = "encoding")] + use encoding::all as enc; + #[cfg(feature = "encoding_rs")] + use encoding_rs as enc_rs; struct Accumulate - where A: Atomicity, + where + A: Atomicity, { tendrils: Vec>, errors: Vec, } impl Accumulate - where A: Atomicity, + where + A: Atomicity, { fn new() -> Accumulate { Accumulate { @@ -462,7 +500,8 @@ mod test { } impl TendrilSink for Accumulate - where A: Atomicity, + where + A: Atomicity, { fn process(&mut self, t: Tendril) { self.tendrils.push(t); @@ -482,7 +521,10 @@ mod test { fn check_utf8(input: &[&[u8]], expected: &[&str], errs: usize) { let decoder = Utf8LossyDecoder::new(Accumulate::::new()); let (tendrils, errors) = decoder.from_iter(input.iter().cloned()); - assert_eq!(expected, &*tendrils.iter().map(|t| &**t).collect::>()); + assert_eq!( + expected, + &*tendrils.iter().map(|t| &**t).collect::>() + ); assert_eq!(errs, errors.len()); } @@ -496,21 +538,45 @@ mod test { check_utf8(&[b"xy\xEA\x99\xAEzw"], &["xy\u{a66e}zw"], 0); check_utf8(&[b"xy\xEA", b"\x99\xAEzw"], &["xy", "\u{a66e}z", "w"], 0); check_utf8(&[b"xy\xEA\x99", b"\xAEzw"], &["xy", "\u{a66e}z", "w"], 0); - check_utf8(&[b"xy\xEA", b"\x99", b"\xAEzw"], &["xy", "\u{a66e}z", "w"], 0); + check_utf8( + &[b"xy\xEA", b"\x99", b"\xAEzw"], + &["xy", "\u{a66e}z", "w"], + 0, + ); check_utf8(&[b"\xEA", b"", b"\x99", b"", b"\xAE"], &["\u{a66e}"], 0); - check_utf8(&[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""], &["\u{a66e}"], 0); - - check_utf8(&[b"xy\xEA", b"\xFF", b"\x99\xAEz"], - &["xy", "\u{fffd}", "\u{fffd}", "\u{fffd}", "\u{fffd}", "z"], 4); - check_utf8(&[b"xy\xEA\x99", b"\xFFz"], - &["xy", "\u{fffd}", "\u{fffd}", "z"], 2); + check_utf8( + &[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""], + &["\u{a66e}"], + 0, + ); + + check_utf8( + &[b"xy\xEA", b"\xFF", b"\x99\xAEz"], + &["xy", "\u{fffd}", "\u{fffd}", "\u{fffd}", "\u{fffd}", "z"], + 4, + ); + check_utf8( + &[b"xy\xEA\x99", b"\xFFz"], + &["xy", "\u{fffd}", "\u{fffd}", "z"], + 2, + ); check_utf8(&[b"\xC5\x91\xC5\x91\xC5\x91"], &["őőő"], 0); - check_utf8(&[b"\xC5\x91", b"\xC5\x91", b"\xC5\x91"], &["ő", "ő", "ő"], 0); - check_utf8(&[b"\xC5", b"\x91\xC5", b"\x91\xC5", b"\x91"], - &["ő", "ő", "ő"], 0); - check_utf8(&[b"\xC5", b"\x91\xff", b"\x91\xC5", b"\x91"], - &["ő", "\u{fffd}", "\u{fffd}", "ő"], 2); + check_utf8( + &[b"\xC5\x91", b"\xC5\x91", b"\xC5\x91"], + &["ő", "ő", "ő"], + 0, + ); + check_utf8( + &[b"\xC5", b"\x91\xC5", b"\x91\xC5", b"\x91"], + &["ő", "ő", "ő"], + 0, + ); + check_utf8( + &[b"\xC5", b"\x91\xff", b"\x91\xC5", b"\x91"], + &["ő", "\u{fffd}", "\u{fffd}", "ő"], + 2, + ); // incomplete char at end of input check_utf8(&[b"\xC0"], &["\u{fffd}"], 1); @@ -546,7 +612,6 @@ mod test { (&[b"xyz"], "xyz", 0), (&[b"xy", b"", b"", b"z"], "xyz", 0), (&[b"x", b"y", b"z"], "xyz", 0), - (&[b"\xFF"], "\u{fffd}", 1), (&[b"x\xC0yz"], "x\u{fffd}yz", 1), (&[b"x", b"\xC0y", b"z"], "x\u{fffd}yz", 1), @@ -568,18 +633,23 @@ mod test { (&[b""], "", 0), (&[b"xyz"], "xyz", 0), (&[b"x", b"y", b"z"], "xyz", 0), - (&[b"\xEA\x99\xAE"], "\u{a66e}", 0), (&[b"\xEA", b"\x99\xAE"], "\u{a66e}", 0), (&[b"\xEA\x99", b"\xAE"], "\u{a66e}", 0), (&[b"\xEA", b"\x99", b"\xAE"], "\u{a66e}", 0), (&[b"\xEA", b"", b"\x99", b"", b"\xAE"], "\u{a66e}", 0), - (&[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""], "\u{a66e}", 0), - + ( + &[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""], + "\u{a66e}", + 0, + ), (&[b"xy\xEA", b"\x99\xAEz"], "xy\u{a66e}z", 0), - (&[b"xy\xEA", b"\xFF", b"\x99\xAEz"], "xy\u{fffd}\u{fffd}\u{fffd}\u{fffd}z", 4), + ( + &[b"xy\xEA", b"\xFF", b"\x99\xAEz"], + "xy\u{fffd}\u{fffd}\u{fffd}\u{fffd}z", + 4, + ), (&[b"xy\xEA\x99", b"\xFFz"], "xy\u{fffd}\u{fffd}z", 2), - // incomplete char at end of input (&[b"\xC0"], "\u{fffd}", 1), (&[b"\xEA\x99"], "\u{fffd}", 1), @@ -608,7 +678,11 @@ mod test { (&[b"\xfc\xce\xc5\xd2\xc7\xc9\xd1"], "Энергия", 0), (&[b"\xfc\xce", b"\xc5\xd2\xc7\xc9\xd1"], "Энергия", 0), (&[b"\xfc\xce", b"\xc5\xd2\xc7", b"\xc9\xd1"], "Энергия", 0), - (&[b"\xfc\xce", b"", b"\xc5\xd2\xc7", b"\xc9\xd1", b""], "Энергия", 0), + ( + &[b"\xfc\xce", b"", b"\xc5\xd2\xc7", b"\xc9\xd1", b""], + "Энергия", + 0, + ), ]; #[cfg(feature = "encoding")] @@ -636,9 +710,12 @@ mod test { (&[b"\xbe\xc8\xb3\xe7"], "안녕", 0), (&[b"\xbe", b"\xc8\xb3\xe7"], "안녕", 0), (&[b"\xbe", b"", b"\xc8\xb3\xe7"], "안녕", 0), - (&[b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4"], "안녕하세요", 0), + ( + &[b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4"], + "안녕하세요", + 0, + ), (&[b"\xbe\xc8\xb3\xe7\xc7"], "안녕\u{fffd}", 1), - (&[b"\xbe", b"", b"\xc8\xb3"], "안\u{fffd}", 1), (&[b"\xbe\x28\xb3\xe7"], "\u{fffd}(녕", 1), ]; @@ -666,8 +743,10 @@ mod test { let decoder = Utf8LossyDecoder::new(Accumulate::::new()); let mut bytes: &[u8] = b"foo\xffbar"; let (tendrils, errors) = decoder.read_from(&mut bytes).unwrap(); - assert_eq!(&*tendrils.iter().map(|t| &**t).collect::>(), - &["foo", "\u{FFFD}", "bar"]); + assert_eq!( + &*tendrils.iter().map(|t| &**t).collect::>(), + &["foo", "\u{FFFD}", "bar"] + ); assert_eq!(errors, &["invalid byte sequence"]); } } diff --git a/src/tendril.rs b/src/tendril.rs index e48d648..0941b26 100644 --- a/src/tendril.rs +++ b/src/tendril.rs @@ -1,30 +1,29 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::{ptr, mem, hash, str, u32, io, slice}; -use std::sync::atomic::{self, AtomicUsize}; -use std::sync::atomic::Ordering as AtomicOrdering; use std::borrow::Borrow; -use std::marker::PhantomData; -use std::cell::Cell; -use std::ops::{Deref, DerefMut}; -use std::iter::FromIterator; -use std::io::Write; -use std::default::Default; +use std::cell::{Cell, UnsafeCell}; use std::cmp::Ordering; +use std::default::Default; use std::fmt as strfmt; +use std::iter::FromIterator; +use std::marker::PhantomData; +use std::num::NonZeroUsize; +use std::ops::{Deref, DerefMut}; +use std::sync::atomic::Ordering as AtomicOrdering; +use std::sync::atomic::{self, AtomicUsize}; +use std::{hash, io, mem, ptr, str, u32}; -#[cfg(feature = "encoding")] use encoding::{self, EncodingRef, DecoderTrap, EncoderTrap}; - +#[cfg(feature = "encoding")] +use encoding::{self, DecoderTrap, EncoderTrap, EncodingRef}; use buf32::{self, Buf32}; -use fmt::{self, Slice}; use fmt::imp::Fixup; -use util::{unsafe_slice, unsafe_slice_mut, copy_and_advance, copy_lifetime_mut, copy_lifetime, - NonZeroUsize}; +use fmt::{self, Slice}; +use util::{copy_and_advance, copy_lifetime, copy_lifetime_mut, unsafe_slice, unsafe_slice_mut}; use OFLOW; const MAX_INLINE_LEN: usize = 8; @@ -34,13 +33,7 @@ const EMPTY_TAG: usize = 0xF; #[inline(always)] fn inline_tag(len: u32) -> NonZeroUsize { debug_assert!(len <= MAX_INLINE_LEN as u32); - unsafe { - NonZeroUsize::new(if len == 0 { - EMPTY_TAG - } else { - len as usize - }) - } + unsafe { NonZeroUsize::new_unchecked(if len == 0 { EMPTY_TAG } else { len as usize }) } } /// The multithreadedness of a tendril. @@ -75,6 +68,7 @@ pub unsafe trait Atomicity: 'static { /// and so doesn't typically need to be written. /// /// This is akin to using `Rc` for reference counting. +#[repr(C)] pub struct NonAtomic(Cell); unsafe impl Atomicity for NonAtomic { @@ -98,8 +92,7 @@ unsafe impl Atomicity for NonAtomic { } #[inline] - fn fence_acquire() { - } + fn fence_acquire() {} } /// A marker of an atomic (and hence concurrent) tendril. @@ -133,20 +126,21 @@ unsafe impl Atomicity for Atomic { } } -#[repr(packed)] +#[repr(C)] // Preserve field order for cross-atomicity transmutes struct Header { refcount: A, cap: u32, } impl Header - where A: Atomicity, +where + A: Atomicity, { #[inline(always)] unsafe fn new() -> Header { Header { refcount: A::new(), - cap: mem::uninitialized(), + cap: 0, } } } @@ -188,17 +182,35 @@ pub enum SubtendrilError { /// you attempt to go over the limit. #[repr(C)] pub struct Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { ptr: Cell, - len: u32, - aux: Cell, + buf: UnsafeCell, marker: PhantomData<*mut F>, refcount_marker: PhantomData, } -unsafe impl Send for Tendril where F: fmt::Format, A: Atomicity + Sync { } +#[repr(C)] +union Buffer { + heap: Heap, + inline: [u8; 8], +} + +#[derive(Copy, Clone)] +#[repr(C)] +struct Heap { + len: u32, + aux: u32, +} + +unsafe impl Send for Tendril +where + F: fmt::Format, + A: Atomicity + Sync, +{ +} /// `Tendril` for storing native Rust strings. pub type StrTendril = Tendril; @@ -207,8 +219,9 @@ pub type StrTendril = Tendril; pub type ByteTendril = Tendril; impl Clone for Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { #[inline] fn clone(&self) -> Tendril { @@ -224,8 +237,9 @@ impl Clone for Tendril } impl Drop for Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { #[inline] fn drop(&mut self) { @@ -253,21 +267,24 @@ macro_rules! from_iter_method { ($ty:ty) => { #[inline] fn from_iter(iterable: I) -> Self - where I: IntoIterator + where + I: IntoIterator, { let mut output = Self::new(); output.extend(iterable); output } - } + }; } impl Extend for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) - where I: IntoIterator, + where + I: IntoIterator, { let iterator = iterable.into_iter(); self.force_reserve(iterator.size_hint().0 as u32); @@ -278,17 +295,20 @@ impl Extend for Tendril } impl FromIterator for Tendril - where A: Atomicity, +where + A: Atomicity, { from_iter_method!(char); } impl Extend for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) - where I: IntoIterator, + where + I: IntoIterator, { let iterator = iterable.into_iter(); self.force_reserve(iterator.size_hint().0 as u32); @@ -299,17 +319,20 @@ impl Extend for Tendril } impl FromIterator for Tendril - where A: Atomicity, +where + A: Atomicity, { from_iter_method!(u8); } impl<'a, A> Extend<&'a u8> for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) - where I: IntoIterator, + where + I: IntoIterator, { let iterator = iterable.into_iter(); self.force_reserve(iterator.size_hint().0 as u32); @@ -320,17 +343,20 @@ impl<'a, A> Extend<&'a u8> for Tendril } impl<'a, A> FromIterator<&'a u8> for Tendril - where A: Atomicity, +where + A: Atomicity, { from_iter_method!(&'a u8); } impl<'a, A> Extend<&'a str> for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) - where I: IntoIterator, + where + I: IntoIterator, { for s in iterable { self.push_slice(s); @@ -339,17 +365,20 @@ impl<'a, A> Extend<&'a str> for Tendril } impl<'a, A> FromIterator<&'a str> for Tendril - where A: Atomicity, +where + A: Atomicity, { from_iter_method!(&'a str); } impl<'a, A> Extend<&'a [u8]> for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) - where I: IntoIterator, + where + I: IntoIterator, { for s in iterable { self.push_slice(s); @@ -358,18 +387,21 @@ impl<'a, A> Extend<&'a [u8]> for Tendril } impl<'a, A> FromIterator<&'a [u8]> for Tendril - where A: Atomicity, +where + A: Atomicity, { from_iter_method!(&'a [u8]); } impl<'a, F, A> Extend<&'a Tendril> for Tendril - where F: fmt::Format + 'a, - A: Atomicity, +where + F: fmt::Format + 'a, + A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) - where I: IntoIterator>, + where + I: IntoIterator>, { for t in iterable { self.push_tendril(t); @@ -378,41 +410,41 @@ impl<'a, F, A> Extend<&'a Tendril> for Tendril } impl<'a, F, A> FromIterator<&'a Tendril> for Tendril - where F: fmt::Format + 'a, - A: Atomicity, +where + F: fmt::Format + 'a, + A: Atomicity, { from_iter_method!(&'a Tendril); } impl Deref for Tendril - where F: fmt::SliceFormat, - A: Atomicity, +where + F: fmt::SliceFormat, + A: Atomicity, { type Target = F::Slice; #[inline] fn deref(&self) -> &F::Slice { - unsafe { - F::Slice::from_bytes(self.as_byte_slice()) - } + unsafe { F::Slice::from_bytes(self.as_byte_slice()) } } } impl DerefMut for Tendril - where F: fmt::SliceFormat, - A: Atomicity, +where + F: fmt::SliceFormat, + A: Atomicity, { #[inline] fn deref_mut(&mut self) -> &mut F::Slice { - unsafe { - F::Slice::from_mut_bytes(self.as_mut_byte_slice()) - } + unsafe { F::Slice::from_mut_bytes(self.as_mut_byte_slice()) } } } impl Borrow<[u8]> for Tendril - where F: fmt::SliceFormat, - A: Atomicity, +where + F: fmt::SliceFormat, + A: Atomicity, { fn borrow(&self) -> &[u8] { self.as_byte_slice() @@ -424,8 +456,9 @@ impl Borrow<[u8]> for Tendril // https://github.com/rust-lang/rust/issues/27108 impl PartialEq for Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { #[inline] fn eq(&self, other: &Self) -> bool { @@ -439,14 +472,17 @@ impl PartialEq for Tendril } impl Eq for Tendril - where F: fmt::Format, - A: Atomicity, -{ } +where + F: fmt::Format, + A: Atomicity, +{ +} impl PartialOrd for Tendril - where F: fmt::SliceFormat, - ::Slice: PartialOrd, - A: Atomicity, +where + F: fmt::SliceFormat, + ::Slice: PartialOrd, + A: Atomicity, { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -455,9 +491,10 @@ impl PartialOrd for Tendril } impl Ord for Tendril - where F: fmt::SliceFormat, - ::Slice: Ord, - A: Atomicity, +where + F: fmt::SliceFormat, + ::Slice: Ord, + A: Atomicity, { #[inline] fn cmp(&self, other: &Self) -> Ordering { @@ -466,8 +503,9 @@ impl Ord for Tendril } impl Default for Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { #[inline(always)] fn default() -> Tendril { @@ -476,9 +514,10 @@ impl Default for Tendril } impl strfmt::Debug for Tendril - where F: fmt::SliceFormat + Default + strfmt::Debug, - ::Slice: strfmt::Debug, - A: Atomicity, +where + F: fmt::SliceFormat + Default + strfmt::Debug, + ::Slice: strfmt::Debug, + A: Atomicity, { #[inline] fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result { @@ -488,15 +527,16 @@ impl strfmt::Debug for Tendril _ => "owned", }; - try!(write!(f, "Tendril<{:?}>({}: ", ::default(), kind)); - try!(<::Slice as strfmt::Debug>::fmt(&**self, f)); + write!(f, "Tendril<{:?}>({}: ", ::default(), kind)?; + <::Slice as strfmt::Debug>::fmt(&**self, f)?; write!(f, ")") } } impl hash::Hash for Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { #[inline] fn hash(&self, hasher: &mut H) { @@ -505,15 +545,14 @@ impl hash::Hash for Tendril } impl Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { /// Create a new, empty `Tendril` in any format. #[inline(always)] pub fn new() -> Tendril { - unsafe { - Tendril::inline(&[]) - } + unsafe { Tendril::inline(&[]) } } /// Create a new, empty `Tendril` with a specified capacity. @@ -561,7 +600,7 @@ impl Tendril match self.ptr.get().get() { EMPTY_TAG => 0, n if n <= MAX_INLINE_LEN => n as u32, - _ => self.len, + _ => unsafe { self.raw_len() }, } } @@ -585,14 +624,15 @@ impl Tendril #[inline] pub fn clear(&mut self) { if self.ptr.get().get() <= MAX_INLINE_TAG { - self.ptr.set(unsafe { NonZeroUsize::new(EMPTY_TAG) }); + self.ptr + .set(unsafe { NonZeroUsize::new_unchecked(EMPTY_TAG) }); } else { let (_, shared, _) = unsafe { self.assume_buf() }; if shared { // No need to keep a reference alive for a 0-size slice. *self = Tendril::new(); } else { - self.len = 0; + unsafe { self.set_len(0) }; } } } @@ -635,8 +675,9 @@ impl Tendril /// View as a superset format, for free. #[inline(always)] pub fn as_superset(&self) -> &Tendril - where F: fmt::SubsetOf, - Super: fmt::Format, + where + F: fmt::SubsetOf, + Super: fmt::Format, { unsafe { mem::transmute(self) } } @@ -644,8 +685,9 @@ impl Tendril /// Convert into a superset format, for free. #[inline(always)] pub fn into_superset(self) -> Tendril - where F: fmt::SubsetOf, - Super: fmt::Format, + where + F: fmt::SubsetOf, + Super: fmt::Format, { unsafe { mem::transmute(self) } } @@ -653,7 +695,8 @@ impl Tendril /// View as a subset format, if the `Tendril` conforms to that subset. #[inline] pub fn try_as_subset(&self) -> Result<&Tendril, ()> - where Sub: fmt::SubsetOf, + where + Sub: fmt::SubsetOf, { match Sub::revalidate_subset(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), @@ -664,7 +707,8 @@ impl Tendril /// Convert into a subset format, if the `Tendril` conforms to that subset. #[inline] pub fn try_into_subset(self) -> Result, Self> - where Sub: fmt::SubsetOf, + where + Sub: fmt::SubsetOf, { match Sub::revalidate_subset(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), @@ -676,7 +720,8 @@ impl Tendril /// that format. #[inline] pub fn try_reinterpret_view(&self) -> Result<&Tendril, ()> - where Other: fmt::Format, + where + Other: fmt::Format, { match Other::validate(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), @@ -692,7 +737,8 @@ impl Tendril /// See the `encode` and `decode` methods for character encoding conversion. #[inline] pub fn try_reinterpret(self) -> Result, Self> - where Other: fmt::Format, + where + Other: fmt::Format, { match Other::validate(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), @@ -723,11 +769,12 @@ impl Tendril let (self_buf, self_shared, _) = self.assume_buf(); let (other_buf, other_shared, _) = other.assume_buf(); - if self_shared && other_shared + if self_shared + && other_shared && (self_buf.data_ptr() == other_buf.data_ptr()) - && (other.aux.get() == self.aux.get() + self.len) + && other.aux() == self.aux() + self.raw_len() { - self.len = new_len; + self.set_len(new_len); return; } } @@ -745,17 +792,18 @@ impl Tendril /// `Err` if these are out of bounds, or if the resulting slice /// does not conform to the format. #[inline] - pub fn try_subtendril(&self, offset: u32, length: u32) - -> Result, SubtendrilError> - { + pub fn try_subtendril( + &self, + offset: u32, + length: u32, + ) -> Result, SubtendrilError> { let self_len = self.len32(); if offset > self_len || length > (self_len - offset) { return Err(SubtendrilError::OutOfBounds); } unsafe { - let byte_slice = unsafe_slice(self.as_byte_slice(), - offset as usize, length as usize); + let byte_slice = unsafe_slice(self.as_byte_slice(), offset as usize, length as usize); if !F::validate_subseq(byte_slice) { return Err(SubtendrilError::ValidationFailed); } @@ -788,8 +836,11 @@ impl Tendril let new_len = old_len - n; unsafe { - if !F::validate_suffix(unsafe_slice(self.as_byte_slice(), - n as usize, new_len as usize)) { + if !F::validate_suffix(unsafe_slice( + self.as_byte_slice(), + n as usize, + new_len as usize, + )) { return Err(SubtendrilError::ValidationFailed); } @@ -823,8 +874,7 @@ impl Tendril let new_len = old_len - n; unsafe { - if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), - 0, new_len as usize)) { + if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)) { return Err(SubtendrilError::ValidationFailed); } @@ -845,7 +895,8 @@ impl Tendril /// View as another format, without validating. #[inline(always)] pub unsafe fn reinterpret_view_without_validating(&self) -> &Tendril - where Other: fmt::Format, + where + Other: fmt::Format, { mem::transmute(self) } @@ -853,7 +904,8 @@ impl Tendril /// Convert into another format, without validating. #[inline(always)] pub unsafe fn reinterpret_without_validating(self) -> Tendril - where Other: fmt::Format, + where + Other: fmt::Format, { mem::transmute(self) } @@ -874,35 +926,52 @@ impl Tendril pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) { assert!(buf.len() <= buf32::MAX_LEN); - let Fixup { drop_left, drop_right, insert_len, insert_bytes } - = F::fixup(self.as_byte_slice(), buf); + let Fixup { + drop_left, + drop_right, + insert_len, + insert_bytes, + } = F::fixup(self.as_byte_slice(), buf); // FIXME: think more about overflow let adj_len = self.len32() + insert_len - drop_left; - let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - - drop_right; + let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - drop_right; let drop_left = drop_left as usize; let drop_right = drop_right as usize; if new_len <= MAX_INLINE_LEN as u32 { - let mut tmp: [u8; MAX_INLINE_LEN] = mem::uninitialized(); + let mut tmp = [0_u8; MAX_INLINE_LEN]; { let old = self.as_byte_slice(); let mut dest = tmp.as_mut_ptr(); copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left)); - copy_and_advance(&mut dest, unsafe_slice(&insert_bytes, 0, insert_len as usize)); - copy_and_advance(&mut dest, unsafe_slice(buf, drop_right, buf.len() - drop_right)); + copy_and_advance( + &mut dest, + unsafe_slice(&insert_bytes, 0, insert_len as usize), + ); + copy_and_advance( + &mut dest, + unsafe_slice(buf, drop_right, buf.len() - drop_right), + ); } *self = Tendril::inline(&tmp[..new_len as usize]); } else { self.make_owned_with_capacity(new_len); let (owned, _, _) = self.assume_buf(); - let mut dest = owned.data_ptr().offset((owned.len as usize - drop_left) as isize); - copy_and_advance(&mut dest, unsafe_slice(&insert_bytes, 0, insert_len as usize)); - copy_and_advance(&mut dest, unsafe_slice(buf, drop_right, buf.len() - drop_right)); - self.len = new_len; + let mut dest = owned + .data_ptr() + .offset((owned.len as usize - drop_left) as isize); + copy_and_advance( + &mut dest, + unsafe_slice(&insert_bytes, 0, insert_len as usize), + ); + copy_and_advance( + &mut dest, + unsafe_slice(buf, drop_right, buf.len() - drop_right), + ); + self.set_len(new_len); } } @@ -912,13 +981,16 @@ impl Tendril #[inline] pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril { if length <= MAX_INLINE_LEN as u32 { - Tendril::inline(unsafe_slice(self.as_byte_slice(), - offset as usize, length as usize)) + Tendril::inline(unsafe_slice( + self.as_byte_slice(), + offset as usize, + length as usize, + )) } else { self.make_buf_shared(); self.incref(); let (buf, _, _) = self.assume_buf(); - Tendril::shared(buf, self.aux.get() + offset, length) + Tendril::shared(buf, self.aux() + offset, length) } } @@ -929,12 +1001,16 @@ impl Tendril pub unsafe fn unsafe_pop_front(&mut self, n: u32) { let new_len = self.len32() - n; if new_len <= MAX_INLINE_LEN as u32 { - *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), - n as usize, new_len as usize)); + *self = Tendril::inline(unsafe_slice( + self.as_byte_slice(), + n as usize, + new_len as usize, + )); } else { self.make_buf_shared(); - self.aux.set(self.aux.get() + n); - self.len -= n; + self.set_aux(self.aux() + n); + let len = self.raw_len(); + self.set_len(len - n); } } @@ -945,11 +1021,11 @@ impl Tendril pub unsafe fn unsafe_pop_back(&mut self, n: u32) { let new_len = self.len32() - n; if new_len <= MAX_INLINE_LEN as u32 { - *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), - 0, new_len as usize)); + *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)); } else { self.make_buf_shared(); - self.len -= n; + let len = self.raw_len(); + self.set_len(len - n); } } @@ -963,10 +1039,10 @@ impl Tendril let p = self.ptr.get().get(); if p & 1 == 0 { let header = p as *mut Header; - (*header).cap = self.aux.get(); + (*header).cap = self.aux(); - self.ptr.set(NonZeroUsize::new(p | 1)); - self.aux.set(0); + self.ptr.set(NonZeroUsize::new_unchecked(p | 1)); + self.set_aux(0); } } @@ -988,8 +1064,8 @@ impl Tendril self.make_owned(); let mut buf = self.assume_buf().0; buf.grow(cap); - self.ptr.set(NonZeroUsize::new(buf.ptr as usize)); - self.aux.set(buf.cap); + self.ptr.set(NonZeroUsize::new_unchecked(buf.ptr as usize)); + self.set_aux(buf.cap); } #[inline(always)] @@ -1003,37 +1079,44 @@ impl Tendril let header = self.header(); let shared = (ptr & 1) == 1; let (cap, offset) = match shared { - true => ((*header).cap, self.aux.get()), - false => (self.aux.get(), 0), + true => ((*header).cap, self.aux()), + false => (self.aux(), 0), }; - (Buf32 { - ptr: header, - len: offset + self.len32(), - cap: cap, - }, shared, offset) + ( + Buf32 { + ptr: header, + len: offset + self.len32(), + cap: cap, + }, + shared, + offset, + ) } #[inline] unsafe fn inline(x: &[u8]) -> Tendril { let len = x.len(); - let mut t = Tendril { + let t = Tendril { ptr: Cell::new(inline_tag(len as u32)), - len: mem::uninitialized(), - aux: mem::uninitialized(), + buf: UnsafeCell::new(Buffer { inline: [0; 8] }), marker: PhantomData, refcount_marker: PhantomData, }; - ptr::copy_nonoverlapping(x.as_ptr(), &mut t.len as *mut u32 as *mut u8, len); + ptr::copy_nonoverlapping(x.as_ptr(), (*t.buf.get()).inline.as_mut_ptr(), len); t } #[inline] unsafe fn owned(x: Buf32>) -> Tendril { Tendril { - ptr: Cell::new(NonZeroUsize::new(x.ptr as usize)), - len: x.len, - aux: Cell::new(x.cap), + ptr: Cell::new(NonZeroUsize::new_unchecked(x.ptr as usize)), + buf: UnsafeCell::new(Buffer { + heap: Heap { + len: x.len, + aux: x.cap, + }, + }), marker: PhantomData, refcount_marker: PhantomData, } @@ -1051,9 +1134,10 @@ impl Tendril #[inline] unsafe fn shared(buf: Buf32>, off: u32, len: u32) -> Tendril { Tendril { - ptr: Cell::new(NonZeroUsize::new((buf.ptr as usize) | 1)), - len: len, - aux: Cell::new(off), + ptr: Cell::new(NonZeroUsize::new_unchecked((buf.ptr as usize) | 1)), + buf: UnsafeCell::new(Buffer { + heap: Heap { len, aux: off }, + }), marker: PhantomData, refcount_marker: PhantomData, } @@ -1064,13 +1148,13 @@ impl Tendril unsafe { match self.ptr.get().get() { EMPTY_TAG => &[], - n if n <= MAX_INLINE_LEN => { - slice::from_raw_parts(&self.len as *const u32 as *const u8, n) - } + n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked(..n), _ => { let (buf, _, offset) = self.assume_buf(); - copy_lifetime(self, unsafe_slice(buf.data(), - offset as usize, self.len32() as usize)) + copy_lifetime( + self, + unsafe_slice(buf.data(), offset as usize, self.len32() as usize), + ) } } } @@ -1083,9 +1167,7 @@ impl Tendril unsafe { match self.ptr.get().get() { EMPTY_TAG => &mut [], - n if n <= MAX_INLINE_LEN => { - slice::from_raw_parts_mut(&mut self.len as *mut u32 as *mut u8, n) - } + n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked_mut(..n), _ => { self.make_owned(); let (mut buf, _, offset) = self.assume_buf(); @@ -1095,26 +1177,39 @@ impl Tendril } } } + + unsafe fn raw_len(&self) -> u32 { + (*self.buf.get()).heap.len + } + + unsafe fn set_len(&mut self, len: u32) { + (*self.buf.get()).heap.len = len; + } + + unsafe fn aux(&self) -> u32 { + (*self.buf.get()).heap.aux + } + + unsafe fn set_aux(&self, aux: u32) { + (*self.buf.get()).heap.aux = aux; + } } impl Tendril - where F: fmt::SliceFormat, - A: Atomicity, +where + F: fmt::SliceFormat, + A: Atomicity, { /// Build a `Tendril` by copying a slice. #[inline] pub fn from_slice(x: &F::Slice) -> Tendril { - unsafe { - Tendril::from_byte_slice_without_validating(x.as_bytes()) - } + unsafe { Tendril::from_byte_slice_without_validating(x.as_bytes()) } } /// Push a slice onto the end of the `Tendril`. #[inline] pub fn push_slice(&mut self, x: &F::Slice) { - unsafe { - self.push_bytes_without_validating(x.as_bytes()) - } + unsafe { self.push_bytes_without_validating(x.as_bytes()) } } } @@ -1129,16 +1224,18 @@ impl Tendril /// and may be returned to a `Tendril` by `Tendril::from(self)`. #[derive(Clone)] pub struct SendTendril - where F: fmt::Format, +where + F: fmt::Format, { tendril: Tendril, } -unsafe impl Send for SendTendril where F: fmt::Format { } +unsafe impl Send for SendTendril where F: fmt::Format {} impl From> for SendTendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { #[inline] fn from(tendril: Tendril) -> SendTendril { @@ -1147,14 +1244,13 @@ impl From> for SendTendril } impl From> for Tendril - where F: fmt::Format, - A: Atomicity, +where + F: fmt::Format, + A: Atomicity, { #[inline] fn from(send: SendTendril) -> Tendril { - unsafe { - mem::transmute(send.tendril) - } + unsafe { mem::transmute(send.tendril) } // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value // will be the same (1) regardless, because the layout is defined. // Thus we don't need to fiddle about resetting it or anything like that. @@ -1162,33 +1258,38 @@ impl From> for Tendril } /// `Tendril`-related methods for Rust slices. -pub trait SliceExt: fmt::Slice where F: fmt::SliceFormat { +pub trait SliceExt: fmt::Slice +where + F: fmt::SliceFormat, +{ /// Make a `Tendril` from this slice. #[inline] fn to_tendril(&self) -> Tendril { - // It should be done thusly, but at the time of writing the defaults don't help inference: - //fn to_tendril(&self) -> Tendril - // where A: Atomicity, - //{ + // It should be done thusly, but at the time of writing the defaults don't help inference: + //fn to_tendril(&self) -> Tendril + // where A: Atomicity, + //{ Tendril::from_slice(self) } } -impl SliceExt for str { } -impl SliceExt for [u8] { } +impl SliceExt for str {} +impl SliceExt for [u8] {} impl Tendril - where F: for<'a> fmt::CharFormat<'a>, - A: Atomicity, +where + F: for<'a> fmt::CharFormat<'a>, + A: Atomicity, { /// Remove and return the first character, if any. #[inline] pub fn pop_front_char<'a>(&'a mut self) -> Option { unsafe { let next_char; // first char in iterator - let mut skip = 0; // number of bytes to skip, or 0 to clear + let mut skip = 0; // number of bytes to skip, or 0 to clear - { // <--+ + { + // <--+ // | Creating an iterator borrows self, so introduce a // +- scope to contain the borrow (that way we can mutate // self below, after this scope exits). @@ -1222,16 +1323,14 @@ impl Tendril /// /// Returns `None` on an empty string. #[inline] - pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) - -> Option<(Tendril, R)> - where C: FnMut(char) -> R, - R: PartialEq, + pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril, R)> + where + C: FnMut(char) -> R, + R: PartialEq, { let (class, first_mismatch); { - let mut chars = unsafe { - F::char_indices(self.as_byte_slice()) - }; + let mut chars = unsafe { F::char_indices(self.as_byte_slice()) }; let (_, first) = unwrap_or_return!(chars.next(), None); class = classify(first); first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class); @@ -1263,15 +1362,18 @@ impl Tendril /// Extension trait for `io::Read`. pub trait ReadExt: io::Read { fn read_to_tendril(&mut self, buf: &mut Tendril) -> io::Result - where A: Atomicity; + where + A: Atomicity; } impl ReadExt for T - where T: io::Read +where + T: io::Read, { /// Read all bytes until EOF. fn read_to_tendril(&mut self, buf: &mut Tendril) -> io::Result - where A: Atomicity, + where + A: Atomicity, { // Adapted from libstd/io/mod.rs. const DEFAULT_BUF_SIZE: u32 = 64 * 1024; @@ -1315,7 +1417,8 @@ impl ReadExt for T } impl io::Write for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { @@ -1337,7 +1440,8 @@ impl io::Write for Tendril #[cfg(feature = "encoding")] impl encoding::ByteWriter for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn write_byte(&mut self, b: u8) { @@ -1356,8 +1460,9 @@ impl encoding::ByteWriter for Tendril } impl Tendril - where A: Atomicity, - F: fmt::SliceFormat +where + A: Atomicity, + F: fmt::SliceFormat, { /// Decode from some character encoding into UTF-8. /// @@ -1365,9 +1470,11 @@ impl Tendril /// for more information. #[inline] #[cfg(feature = "encoding")] - pub fn decode(&self, encoding: EncodingRef, trap: DecoderTrap) - -> Result, ::std::borrow::Cow<'static, str>> - { + pub fn decode( + &self, + encoding: EncodingRef, + trap: DecoderTrap, + ) -> Result, ::std::borrow::Cow<'static, str>> { let mut ret = Tendril::new(); encoding.decode_to(&*self, trap, &mut ret).map(|_| ret) } @@ -1380,19 +1487,18 @@ impl Tendril #[inline] pub unsafe fn push_uninitialized(&mut self, n: u32) { let new_len = self.len32().checked_add(n).expect(OFLOW); - if new_len <= MAX_INLINE_LEN as u32 - && self.ptr.get().get() <= MAX_INLINE_TAG - { + if new_len <= MAX_INLINE_LEN as u32 && self.ptr.get().get() <= MAX_INLINE_TAG { self.ptr.set(inline_tag(new_len)) } else { self.make_owned_with_capacity(new_len); - self.len = new_len; + self.set_len(new_len); } } } impl strfmt::Display for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result { @@ -1401,7 +1507,8 @@ impl strfmt::Display for Tendril } impl str::FromStr for Tendril - where A: Atomicity, +where + A: Atomicity, { type Err = (); @@ -1412,7 +1519,8 @@ impl str::FromStr for Tendril } impl strfmt::Write for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn write_str(&mut self, s: &str) -> strfmt::Result { @@ -1423,7 +1531,8 @@ impl strfmt::Write for Tendril #[cfg(feature = "encoding")] impl encoding::StringWriter for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn write_char(&mut self, c: char) { @@ -1442,7 +1551,8 @@ impl encoding::StringWriter for Tendril } impl Tendril - where A: Atomicity, +where + A: Atomicity, { /// Encode from UTF-8 into some other character encoding. /// @@ -1450,9 +1560,11 @@ impl Tendril /// for more information. #[inline] #[cfg(feature = "encoding")] - pub fn encode(&self, encoding: EncodingRef, trap: EncoderTrap) - -> Result, ::std::borrow::Cow<'static, str>> - { + pub fn encode( + &self, + encoding: EncodingRef, + trap: EncoderTrap, + ) -> Result, ::std::borrow::Cow<'static, str>> { let mut ret = Tendril::new(); encoding.encode_to(&*self, trap, &mut ret).map(|_| ret) } @@ -1461,14 +1573,7 @@ impl Tendril #[inline] pub fn push_char(&mut self, c: char) { unsafe { - let mut utf_8: [u8; 4] = mem::uninitialized(); - let bytes_written = { - let mut buffer = &mut utf_8[..]; - write!(buffer, "{}", c).ok().expect("Tendril::push_char: internal error"); - debug_assert!(buffer.len() <= 4); - 4 - buffer.len() - }; - self.push_bytes_without_validating(unsafe_slice(&utf_8, 0, bytes_written)); + self.push_bytes_without_validating(c.encode_utf8(&mut [0_u8; 4]).as_bytes()); } } @@ -1498,10 +1603,10 @@ macro_rules! format_tendril { ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*))) } - impl<'a, F, A> From<&'a F::Slice> for Tendril - where F: fmt::SliceFormat, - A: Atomicity, +where + F: fmt::SliceFormat, + A: Atomicity, { #[inline] fn from(input: &F::Slice) -> Tendril { @@ -1510,7 +1615,8 @@ impl<'a, F, A> From<&'a F::Slice> for Tendril } impl From for Tendril - where A: Atomicity, +where + A: Atomicity, { #[inline] fn from(input: String) -> Tendril { @@ -1519,8 +1625,9 @@ impl From for Tendril } impl AsRef for Tendril - where F: fmt::SliceFormat, - A: Atomicity, +where + F: fmt::SliceFormat, + A: Atomicity, { #[inline] fn as_ref(&self) -> &F::Slice { @@ -1529,7 +1636,8 @@ impl AsRef for Tendril } impl From> for String - where A: Atomicity, +where + A: Atomicity, { #[inline] fn from(input: Tendril) -> String { @@ -1538,7 +1646,8 @@ impl From> for String } impl<'a, A> From<&'a Tendril> for String - where A: Atomicity, +where + A: Atomicity, { #[inline] fn from(input: &'a Tendril) -> String { @@ -1546,20 +1655,20 @@ impl<'a, A> From<&'a Tendril> for String } } - #[cfg(all(test, feature = "bench"))] -#[path="bench.rs"] +#[path = "bench.rs"] mod bench; #[cfg(test)] mod test { - use super::{Tendril, ByteTendril, StrTendril, SendTendril, - ReadExt, SliceExt, Header, NonAtomic, Atomic}; + use super::{ + Atomic, ByteTendril, Header, NonAtomic, ReadExt, SendTendril, SliceExt, StrTendril, Tendril, + }; use fmt; use std::iter; use std::thread; - fn assert_send() { } + fn assert_send() {} #[test] fn smoke_test() { @@ -1581,8 +1690,13 @@ mod test { } let compiler_uses_inline_drop_flags = mem::size_of::() > 0; - let correct = mem::size_of::<*const ()>() + 8 + - if compiler_uses_inline_drop_flags { 1 } else { 0 }; + let correct = mem::size_of::<*const ()>() + + 8 + + if compiler_uses_inline_drop_flags { + 1 + } else { + 0 + }; assert_eq!(correct, mem::size_of::()); assert_eq!(correct, mem::size_of::()); @@ -1590,9 +1704,14 @@ mod test { assert_eq!(correct, mem::size_of::>()); assert_eq!(correct, mem::size_of::>()); - let correct_header = mem::size_of::<*const ()>() + 4; - assert_eq!(correct_header, mem::size_of::>()); - assert_eq!(correct_header, mem::size_of::>()); + assert_eq!( + mem::size_of::<*const ()>() * 2, + mem::size_of::>(), + ); + assert_eq!( + mem::size_of::>(), + mem::size_of::>(), + ); } #[test] @@ -1602,7 +1721,10 @@ mod test { assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xFF").is_err()); assert!(StrTendril::try_from_byte_slice(b"\xEA\x99").is_err()); assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xAE\xEA").is_err()); - assert_eq!("\u{a66e}", &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap()); + assert_eq!( + "\u{a66e}", + &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap() + ); let mut t = StrTendril::new(); assert!(t.try_push_bytes(b"\xEA\x99").is_err()); @@ -1647,17 +1769,25 @@ mod test { #[test] fn format_debug() { - assert_eq!(r#"Tendril(inline: "foobar")"#, - &*format!("{:?}", "foobar".to_tendril())); - assert_eq!(r#"Tendril(inline: [102, 111, 111, 98, 97, 114])"#, - &*format!("{:?}", b"foobar".to_tendril())); + assert_eq!( + r#"Tendril(inline: "foobar")"#, + &*format!("{:?}", "foobar".to_tendril()) + ); + assert_eq!( + r#"Tendril(inline: [102, 111, 111, 98, 97, 114])"#, + &*format!("{:?}", b"foobar".to_tendril()) + ); let t = "anextralongstring".to_tendril(); - assert_eq!(r#"Tendril(owned: "anextralongstring")"#, - &*format!("{:?}", t)); + assert_eq!( + r#"Tendril(owned: "anextralongstring")"#, + &*format!("{:?}", t) + ); let _ = t.clone(); - assert_eq!(r#"Tendril(shared: "anextralongstring")"#, - &*format!("{:?}", t)); + assert_eq!( + r#"Tendril(shared: "anextralongstring")"#, + &*format!("{:?}", t) + ); } #[test] @@ -1671,12 +1801,18 @@ mod test { t.pop_back(1); assert_eq!("o-ba".to_tendril(), t); - assert_eq!("foo".to_tendril(), - "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3)); - assert_eq!("oo-a-".to_tendril(), - "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5)); - assert_eq!("bar".to_tendril(), - "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3)); + assert_eq!( + "foo".to_tendril(), + "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3) + ); + assert_eq!( + "oo-a-".to_tendril(), + "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5) + ); + assert_eq!( + "bar".to_tendril(), + "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3) + ); let mut t = "another rather long string".to_tendril(); t.pop_front(2); @@ -1718,20 +1854,32 @@ mod test { #[test] fn conversion() { - assert_eq!(&[0x66, 0x6F, 0x6F].to_tendril(), "foo".to_tendril().as_bytes()); - assert_eq!([0x66, 0x6F, 0x6F].to_tendril(), "foo".to_tendril().into_bytes()); + assert_eq!( + &[0x66, 0x6F, 0x6F].to_tendril(), + "foo".to_tendril().as_bytes() + ); + assert_eq!( + [0x66, 0x6F, 0x6F].to_tendril(), + "foo".to_tendril().into_bytes() + ); let ascii: Tendril = b"hello".to_tendril().try_reinterpret().unwrap(); assert_eq!(&"hello".to_tendril(), ascii.as_superset()); assert_eq!("hello".to_tendril(), ascii.clone().into_superset()); - assert!(b"\xFF".to_tendril().try_reinterpret::().is_err()); + assert!(b"\xFF" + .to_tendril() + .try_reinterpret::() + .is_err()); let t = "hello".to_tendril(); let ascii: &Tendril = t.try_as_subset().unwrap(); assert_eq!(b"hello", &**ascii.as_bytes()); - assert!("ő".to_tendril().try_reinterpret_view::().is_err()); + assert!("ő" + .to_tendril() + .try_reinterpret_view::() + .is_err()); assert!("ő".to_tendril().try_as_subset::().is_err()); let ascii: Tendril = "hello".to_tendril().try_into_subset().unwrap(); @@ -1771,12 +1919,10 @@ mod test { assert!(Tendril::::try_from_byte_slice(b"\xED\xB2\xA9").is_ok()); assert!(Tendril::::try_from_byte_slice(b"\xED\xA0\xBD\xED\xB2\xA9").is_err()); - let t: Tendril - = Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap(); - assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap() - == t.subtendril(0, 3)); - assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap() - == t.subtendril(3, 3)); + let t: Tendril = + Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap(); + assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap() == t.subtendril(0, 3)); + assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap() == t.subtendril(3, 3)); assert!(t.try_reinterpret_view::().is_err()); assert!(t.try_subtendril(0, 1).is_err()); @@ -1871,10 +2017,16 @@ mod test { t[4] = 0xEA; t[5] = 0x99; t[6] = 0xAE; - assert_eq!("xyŋ\u{a66e}", &**t.try_reinterpret_view::().unwrap()); + assert_eq!( + "xyŋ\u{a66e}", + &**t.try_reinterpret_view::().unwrap() + ); t.push_uninitialized(20); t.pop_back(20); - assert_eq!("xyŋ\u{a66e}", &**t.try_reinterpret_view::().unwrap()); + assert_eq!( + "xyŋ\u{a66e}", + &**t.try_reinterpret_view::().unwrap() + ); } } @@ -1909,13 +2061,17 @@ mod test { use encoding::{all, EncoderTrap}; let t = "안녕하세요 러스트".to_tendril(); - assert_eq!(b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae", - &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap()); + assert_eq!( + b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae", + &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap() + ); let t = "Энергия пробуждения ия-я-я! \u{a66e}".to_tendril(); - assert_eq!(b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\ + assert_eq!( + b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\ \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?", - &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap()); + &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap() + ); let t = "\u{1f4a9}".to_tendril(); assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err()); @@ -1927,21 +2083,29 @@ mod test { use encoding::{all, DecoderTrap}; let t = b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\ - \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae".to_tendril(); - assert_eq!("안녕하세요 러스트", - &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap()); + \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae" + .to_tendril(); + assert_eq!( + "안녕하세요 러스트", + &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap() + ); let t = b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\ - \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21".to_tendril(); - assert_eq!("Энергия пробуждения ия-я-я!", - &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap()); + \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21" + .to_tendril(); + assert_eq!( + "Энергия пробуждения ия-я-я!", + &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap() + ); let t = b"x \xff y".to_tendril(); assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err()); let t = b"x \xff y".to_tendril(); - assert_eq!("x \u{fffd} y", - &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap()); + assert_eq!( + "x \u{fffd} y", + &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap() + ); } #[test] @@ -1957,10 +2121,8 @@ mod test { assert_eq!(None, t.pop_front_char()); let mut t = mk(b" \t xyz"); - assert!(Some((mk(b" \t "), true)) - == t.pop_front_char_run(char::is_whitespace)); - assert!(Some((mk(b"xyz"), false)) - == t.pop_front_char_run(char::is_whitespace)); + assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace)); + assert!(Some((mk(b"xyz"), false)) == t.pop_front_char_run(char::is_whitespace)); assert!(t.pop_front_char_run(char::is_whitespace).is_none()); let mut t = Tendril::::new(); @@ -1983,10 +2145,8 @@ mod test { assert_eq!(None, t.pop_front_char()); let mut t = mk(b" \t \xfe\xa7z"); - assert!(Some((mk(b" \t "), true)) - == t.pop_front_char_run(char::is_whitespace)); - assert!(Some((mk(b"\xfe\xa7z"), false)) - == t.pop_front_char_run(char::is_whitespace)); + assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace)); + assert!(Some((mk(b"\xfe\xa7z"), false)) == t.pop_front_char_run(char::is_whitespace)); assert!(t.pop_front_char_run(char::is_whitespace).is_none()); let mut t = Tendril::::new(); @@ -2003,7 +2163,10 @@ mod test { #[test] fn format() { assert_eq!("", &*format_tendril!("")); - assert_eq!("two and two make 4", &*format_tendril!("two and two make {}", 2+2)); + assert_eq!( + "two and two make 4", + &*format_tendril!("two and two make {}", 2 + 2) + ); } #[test] @@ -2067,7 +2230,6 @@ mod test { assert!(t.try_pop_back(5).is_err()); assert!(t.try_pop_back(500).is_err()); - let mut t = "abcd".to_tendril(); assert!(t.try_pop_front(1).is_ok()); assert!(t.try_pop_front(4).is_err()); @@ -2079,10 +2241,26 @@ mod test { #[test] fn compare() { - for &a in &["indiscretions", "validity", "hallucinogenics", "timelessness", - "original", "microcosms", "boilers", "mammoth"] { - for &b in &["intrepidly", "frigid", "spa", "cardigans", - "guileful", "evaporated", "unenthusiastic", "legitimate"] { + for &a in &[ + "indiscretions", + "validity", + "hallucinogenics", + "timelessness", + "original", + "microcosms", + "boilers", + "mammoth", + ] { + for &b in &[ + "intrepidly", + "frigid", + "spa", + "cardigans", + "guileful", + "evaporated", + "unenthusiastic", + "legitimate", + ] { let ta = a.to_tendril(); let tb = b.to_tendril(); @@ -2108,9 +2286,17 @@ mod test { assert_eq!("Hello", &*t); t.extend(&[", ".to_tendril(), "world".to_tendril(), "!".to_tendril()]); assert_eq!("Hello, world!", &*t); - assert_eq!("Hello, world!", &*["Hello".to_tendril(), ", ".to_tendril(), - "world".to_tendril(), "!".to_tendril()] - .iter().collect::()); + assert_eq!( + "Hello, world!", + &*[ + "Hello".to_tendril(), + ", ".to_tendril(), + "world".to_tendril(), + "!".to_tendril() + ] + .iter() + .collect::() + ); // &str let mut t = "Hello".to_tendril(); @@ -2118,18 +2304,36 @@ mod test { assert_eq!("Hello", &*t); t.extend([", ", "world", "!"].iter().map(|&s| s)); assert_eq!("Hello, world!", &*t); - assert_eq!("Hello, world!", &*["Hello", ", ", "world", "!"] - .iter().map(|&s| s).collect::()); + assert_eq!( + "Hello, world!", + &*["Hello", ", ", "world", "!"] + .iter() + .map(|&s| s) + .collect::() + ); // &[u8] let mut t = b"Hello".to_tendril(); t.extend(None::<&[u8]>.into_iter()); assert_eq!(b"Hello", &*t); - t.extend([b", ".as_ref(), b"world".as_ref(), b"!".as_ref()].iter().map(|&s| s)); + t.extend( + [b", ".as_ref(), b"world".as_ref(), b"!".as_ref()] + .iter() + .map(|&s| s), + ); assert_eq!(b"Hello, world!", &*t); - assert_eq!(b"Hello, world!", &*[b"Hello".as_ref(), b", ".as_ref(), - b"world".as_ref(), b"!".as_ref()] - .iter().map(|&s| s).collect::()); + assert_eq!( + b"Hello, world!", + &*[ + b"Hello".as_ref(), + b", ".as_ref(), + b"world".as_ref(), + b"!".as_ref() + ] + .iter() + .map(|&s| s) + .collect::() + ); let string = "the quick brown fox jumps over the lazy dog"; let string_expected = string.to_tendril(); @@ -2171,6 +2375,7 @@ mod test { } #[test] + #[cfg_attr(miri, ignore)] // slow fn read() { fn check(x: &[u8]) { use std::io::Cursor; @@ -2217,7 +2422,9 @@ mod test { assert_eq!("this is a string extended", &*t); assert!(t.as_ptr() as usize != sp); assert!(!t.is_shared()); - }).join().unwrap(); + }) + .join() + .unwrap(); assert!(s.is_shared()); assert_eq!("this is a string", &*s); } @@ -2232,10 +2439,22 @@ mod test { let s = StrTendril::from(s2); assert!(!s.is_shared()); assert_eq!("this is a string", &*s); - }).join().unwrap(); + }) + .join() + .unwrap(); assert_eq!("this is a string", &*t); } + /// https://github.com/servo/tendril/issues/58 + #[test] + fn issue_58() { + let data = "

Hello!

, World!"; + let s: Tendril = data.into(); + assert_eq!(&*s, data); + let s: Tendril = s.into_send().into(); + assert_eq!(&*s, data); + } + #[test] fn inline_send() { let s = "x".to_tendril(); @@ -2245,7 +2464,9 @@ mod test { let s = StrTendril::from(s2); assert!(!s.is_shared()); assert_eq!("x", &*s); - }).join().unwrap(); + }) + .join() + .unwrap(); assert_eq!("x", &*t); } } diff --git a/src/utf8_decode.rs b/src/utf8_decode.rs index d3e9e32..b682d57 100644 --- a/src/utf8_decode.rs +++ b/src/utf8_decode.rs @@ -1,21 +1,26 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use fmt; -use tendril::{Tendril, Atomicity}; +use tendril::{Atomicity, Tendril}; use utf8; pub struct IncompleteUtf8(utf8::Incomplete); -impl Tendril where A: Atomicity { +impl Tendril +where + A: Atomicity, +{ pub fn decode_utf8_lossy(mut self, mut push_utf8: F) -> Option - where F: FnMut(Tendril) { + where + F: FnMut(Tendril), + { loop { if self.is_empty() { - return None + return None; } let unborrowed_result = match utf8::decode(&self) { Ok(s) => { @@ -23,12 +28,22 @@ impl Tendril where A: Atomicity { debug_assert!(s.len() == self.len()); Ok(()) } - Err(utf8::DecodeError::Invalid { valid_prefix, invalid_sequence, .. }) => { + Err(utf8::DecodeError::Invalid { + valid_prefix, + invalid_sequence, + .. + }) => { debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); debug_assert!(valid_prefix.len() <= self.len()); - Err((valid_prefix.len(), Err(valid_prefix.len() + invalid_sequence.len()))) + Err(( + valid_prefix.len(), + Err(valid_prefix.len() + invalid_sequence.len()), + )) } - Err(utf8::DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => { + Err(utf8::DecodeError::Incomplete { + valid_prefix, + incomplete_suffix, + }) => { debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); debug_assert!(valid_prefix.len() <= self.len()); Err((valid_prefix.len(), Ok(incomplete_suffix))) @@ -36,17 +51,13 @@ impl Tendril where A: Atomicity { }; match unborrowed_result { Ok(()) => { - unsafe { - push_utf8(self.reinterpret_without_validating()) - } - return None + unsafe { push_utf8(self.reinterpret_without_validating()) } + return None; } Err((valid_len, and_then)) => { if valid_len > 0 { let subtendril = self.subtendril(0, valid_len as u32); - unsafe { - push_utf8(subtendril.reinterpret_without_validating()) - } + unsafe { push_utf8(subtendril.reinterpret_without_validating()) } } match and_then { Ok(incomplete) => return Some(IncompleteUtf8(incomplete)), @@ -62,14 +73,22 @@ impl Tendril where A: Atomicity { } impl IncompleteUtf8 { - pub fn try_complete(&mut self, mut input: Tendril, mut push_utf8: F) - -> Result, ()> - where A: Atomicity, F: FnMut(Tendril) { + pub fn try_complete( + &mut self, + mut input: Tendril, + mut push_utf8: F, + ) -> Result, ()> + where + A: Atomicity, + F: FnMut(Tendril), + { let resume_at; match self.0.try_complete(&input) { None => return Err(()), Some((result, rest)) => { - push_utf8(Tendril::from_slice(result.unwrap_or(utf8::REPLACEMENT_CHARACTER))); + push_utf8(Tendril::from_slice( + result.unwrap_or(utf8::REPLACEMENT_CHARACTER), + )); resume_at = input.len() - rest.len(); } } diff --git a/src/util.rs b/src/util.rs index af489e7..28c55c1 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,11 +1,11 @@ // Licensed under the Apache License, Version 2.0 or the MIT license -// , at your +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::{slice, ptr}; use std::mem; +use std::{ptr, slice}; #[inline(always)] pub unsafe fn unsafe_slice<'a>(buf: &'a [u8], start: usize, new_len: usize) -> &'a [u8] { @@ -15,7 +15,11 @@ pub unsafe fn unsafe_slice<'a>(buf: &'a [u8], start: usize, new_len: usize) -> & } #[inline(always)] -pub unsafe fn unsafe_slice_mut<'a>(buf: &'a mut [u8], start: usize, new_len: usize) -> &'a mut [u8] { +pub unsafe fn unsafe_slice_mut<'a>( + buf: &'a mut [u8], + start: usize, + new_len: usize, +) -> &'a mut [u8] { debug_assert!(start <= buf.len()); debug_assert!(new_len <= (buf.len() - start)); slice::from_raw_parts_mut(buf.as_mut_ptr().offset(start as isize), new_len) @@ -28,30 +32,14 @@ pub unsafe fn copy_and_advance(dest: &mut *mut u8, src: &[u8]) { } #[inline(always)] -pub unsafe fn copy_lifetime_mut<'a, S: ?Sized, T: ?Sized + 'a> - (_ptr: &'a mut S, ptr: &mut T) -> &'a mut T { +pub unsafe fn copy_lifetime_mut<'a, S: ?Sized, T: ?Sized + 'a>( + _ptr: &'a mut S, + ptr: &mut T, +) -> &'a mut T { mem::transmute(ptr) } - #[inline(always)] -pub unsafe fn copy_lifetime<'a, S: ?Sized, T: ?Sized + 'a> - (_ptr: &'a S, ptr: &T) -> &'a T { +pub unsafe fn copy_lifetime<'a, S: ?Sized, T: ?Sized + 'a>(_ptr: &'a S, ptr: &T) -> &'a T { mem::transmute(ptr) } - -#[derive(Copy, Clone)] -pub struct NonZeroUsize(&'static u8); - -impl NonZeroUsize { - #[inline] - pub unsafe fn new(value: usize) -> Self { - debug_assert!(value != 0); - NonZeroUsize(&*(value as *const u8)) - } - - #[inline] - pub fn get(self) -> usize { - self.0 as *const u8 as usize - } -}