[libc] Add memset and bzero implementations

Summary: This patch adds general purpose `memset` and `bzero` implementations.

Reviewers: sivachandra, abrachet

Subscribers: mgorny, tschuett, ecnelises, libc-commits, courbet

Tags: #libc-project

Differential Revision: https://reviews.llvm.org/D80010
This commit is contained in:
Guillaume Chatelet 2020-05-15 14:27:35 +00:00
parent e71c537a48
commit 6ca54e0114
14 changed files with 439 additions and 57 deletions

View File

@ -23,9 +23,11 @@ add_entrypoint_library(
libc.src.stdlib.abort
# string.h entrypoints
libc.src.string.bzero
libc.src.string.memcpy
libc.src.string.strcpy
libc.src.string.memset
libc.src.string.strcat
libc.src.string.strcpy
libc.src.string.strlen
# sys/mman.h entrypoints

View File

@ -34,15 +34,43 @@ add_entrypoint_object(
libc.include.string
)
# Helper to define a function with multiple implementations
# - Computes flags to satisfy required/rejected features and arch,
# - Declares an entry point,
# - Attach the REQUIRE_CPU_FEATURES property to the target,
# - Add the fully qualified target to `${name}_implementations` global property for tests.
function(add_implementation name impl_name)
cmake_parse_arguments(
"ADD_IMPL"
"" # Optional arguments
"MARCH" # Single value arguments
"REQUIRE;REJECT;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments
${ARGN})
compute_flags(flags
MARCH ${ADD_IMPL_MARCH}
REQUIRE ${ADD_IMPL_REQUIRE}
REJECT ${ADD_IMPL_REJECT}
)
add_entrypoint_object(${impl_name}
SRCS ${ADD_IMPL_SRCS}
HDRS ${ADD_IMPL_HDRS}
DEPENDS ${ADD_IMPL_DEPENDS}
COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags}
)
get_fq_target_name(${impl_name} fq_target_name)
set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}")
set_property(GLOBAL APPEND PROPERTY "${name}_implementations" "${fq_target_name}")
endfunction()
# ------------------------------------------------------------------------------
# memcpy
# ------------------------------------------------------------------------------
# include the relevant architecture specific implementations
if(${LIBC_TARGET_MACHINE} STREQUAL "x86_64")
set(LIBC_MEMCPY_IMPL_FOLDER "x86")
set(LIBC_STRING_TARGET_FOLDER "x86")
else()
set(LIBC_MEMCPY_IMPL_FOLDER ${LIBC_TARGET_MACHINE})
set(LIBC_STRING_TARGET_FOLDER ${LIBC_TARGET_MACHINE})
endif()
add_gen_header(
@ -52,30 +80,13 @@ add_gen_header(
GEN_HDR
memcpy_arch_specific.h
PARAMS
memcpy_arch_specific=${LIBC_MEMCPY_IMPL_FOLDER}/memcpy_arch_specific.h.inc
memcpy_arch_specific=${LIBC_STRING_TARGET_FOLDER}/memcpy_arch_specific.h.inc
DATA_FILES
${LIBC_MEMCPY_IMPL_FOLDER}/memcpy_arch_specific.h.inc
${LIBC_STRING_TARGET_FOLDER}/memcpy_arch_specific.h.inc
)
# Helper to define an implementation of memcpy.
# - Computes flags to satisfy required/rejected features and arch,
# - Declares an entry point,
# - Attach the REQUIRE_CPU_FEATURES property to the target,
# - Add the target to `memcpy_implementations` global property for tests.
function(add_memcpy memcpy_name)
cmake_parse_arguments(
"ADD_MEMCPY"
"" # Optional arguments
"MARCH" # Single value arguments
"REQUIRE;REJECT" # Multi value arguments
${ARGN})
compute_flags(flags
MARCH ${ADD_MEMCPY_MARCH}
REQUIRE ${ADD_MEMCPY_REQUIRE}
REJECT ${ADD_MEMCPY_REJECT}
)
add_entrypoint_object(
${memcpy_name}
add_implementation(memcpy ${memcpy_name}
SRCS ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp
HDRS ${LIBC_SOURCE_DIR}/src/string/memcpy.h
DEPENDS
@ -84,14 +95,53 @@ function(add_memcpy memcpy_name)
libc.include.string
COMPILE_OPTIONS
-fno-builtin-memcpy
${flags}
${ARGN}
)
get_fq_target_name(${memcpy_name} fq_target_name)
set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_MEMCPY_REQUIRE}")
get_property(all GLOBAL PROPERTY memcpy_implementations)
list(APPEND all ${memcpy_name})
set_property(GLOBAL PROPERTY memcpy_implementations "${all}")
endfunction()
include(${LIBC_MEMCPY_IMPL_FOLDER}/CMakeLists.txt)
add_memcpy(memcpy MARCH native)
# ------------------------------------------------------------------------------
# memset
# ------------------------------------------------------------------------------
function(add_memset memset_name)
add_implementation(memset ${memset_name}
SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp
HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h
DEPENDS
.memory_utils.memory_utils
libc.include.string
COMPILE_OPTIONS
-fno-builtin-memset
${ARGN}
)
endfunction()
add_memset(memset MARCH native)
# ------------------------------------------------------------------------------
# bzero
# ------------------------------------------------------------------------------
function(add_bzero bzero_name)
add_implementation(bzero ${bzero_name}
SRCS ${LIBC_SOURCE_DIR}/src/string/bzero.cpp
HDRS ${LIBC_SOURCE_DIR}/src/string/bzero.h
DEPENDS
.memory_utils.memory_utils
libc.include.string
COMPILE_OPTIONS
-fno-builtin-memset
-fno-builtin-bzero
${ARGN}
)
endfunction()
add_bzero(bzero MARCH native)
# ------------------------------------------------------------------------------
# Add all other relevant implementations for the native target.
# ------------------------------------------------------------------------------
include(${LIBC_STRING_TARGET_FOLDER}/CMakeLists.txt)

19
libc/src/string/bzero.cpp Normal file
View File

@ -0,0 +1,19 @@
//===-- Implementation of bzero -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/string/bzero.h"
#include "src/__support/common.h"
#include "src/string/memory_utils/memset_utils.h"
namespace __llvm_libc {
void LLVM_LIBC_ENTRYPOINT(bzero)(void *ptr, size_t count) {
GeneralPurposeMemset(reinterpret_cast<char *>(ptr), 0, count);
}
} // namespace __llvm_libc

20
libc/src/string/bzero.h Normal file
View File

@ -0,0 +1,20 @@
//===-- Implementation header for bzero -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_BZERO_H
#define LLVM_LIBC_SRC_STRING_BZERO_H
#include "include/string.h"
namespace __llvm_libc {
void bzero(void *ptr, size_t count);
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_STRING_BZERO_H

View File

@ -15,6 +15,7 @@ add_header_library(
HDRS
utils.h
memcpy_utils.h
memset_utils.h
DEPENDS
.cacheline_size
)

View File

@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H
#define LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H
#include "src/string/memory_utils/utils.h"
#include <stddef.h> // size_t
@ -99,4 +99,4 @@ static void CopyAligned(char *__restrict dst, const char *__restrict src,
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H
#endif // LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H

View File

@ -0,0 +1,131 @@
//===-- Memset utils --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
#include "src/string/memory_utils/utils.h"
#include <stddef.h> // size_t
namespace __llvm_libc {
// Sets `kBlockSize` bytes starting from `src` to `value`.
template <size_t kBlockSize> static void SetBlock(char *dst, unsigned value) {
// Theoretically the compiler is allowed to call memset here and end up with a
// recursive call, practically it doesn't happen, however this should be
// replaced with a __builtin_memset_inline once it's available in clang.
__builtin_memset(dst, value, kBlockSize);
}
// Sets `kBlockSize` bytes from `src + count - kBlockSize` to `value`.
// Precondition: `count >= kBlockSize`.
template <size_t kBlockSize>
static void SetLastBlock(char *dst, unsigned value, size_t count) {
SetBlock<kBlockSize>(dst + count - kBlockSize, value);
}
// Sets `kBlockSize` bytes twice with an overlap between the two.
//
// [1234567812345678123]
// [__XXXXXXXXXXXXXX___]
// [__XXXXXXXX_________]
// [________XXXXXXXX___]
//
// Precondition: `count >= kBlockSize && count <= kBlockSize`.
template <size_t kBlockSize>
static void SetBlockOverlap(char *dst, unsigned value, size_t count) {
SetBlock<kBlockSize>(dst, value);
SetLastBlock<kBlockSize>(dst, value, count);
}
// Sets `count` bytes by blocks of `kBlockSize` bytes.
// Sets at the start and end of the buffer are unaligned.
// Sets in the middle of the buffer are aligned to `kBlockSize`.
//
// e.g. with
// [12345678123456781234567812345678]
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
// [__XXXXXXXX______________________]
// [________XXXXXXXX________________]
// [________________XXXXXXXX________]
// [_____________________XXXXXXXX___]
//
// Precondition: `count > 2 * kBlockSize` for efficiency.
// `count >= kBlockSize` for correctness.
template <size_t kBlockSize>
static void SetAlignedBlocks(char *dst, unsigned value, size_t count) {
SetBlock<kBlockSize>(dst, value); // Set first block
// Set aligned blocks
size_t offset = kBlockSize - offset_from_last_aligned<kBlockSize>(dst);
for (; offset + kBlockSize < count; offset += kBlockSize)
SetBlock<kBlockSize>(dst + offset, value);
SetLastBlock<kBlockSize>(dst, value, count); // Set last block
}
// A general purpose implementation assuming cheap unaligned writes for sizes:
// 1, 2, 4, 8, 16, 32 and 64 Bytes. Note that some architecture can't store 32
// or 64 Bytes at a time, the compiler will expand them as needed.
//
// This implementation is subject to change as we benchmark more processors. We
// may also want to customize it for processors with specialized instructions
// that performs better (e.g. `rep stosb`).
//
// A note on the apparent discrepancy in the use of 32 vs 64 Bytes writes.
// We want to balance two things here:
// - The number of redundant writes (when using `SetBlockOverlap`),
// - The number of conditionals for sizes <=128 (~90% of memset calls are for
// such sizes).
//
// For the range 64-128:
// - SetBlockOverlap<64> uses no conditionals but always writes 128 Bytes this
// is wasteful near 65 but efficient toward 128.
// - SetAlignedBlocks<32> would consume between 3 and 4 conditionals and write
// 96 or 128 Bytes.
// - Another approach could be to use an hybrid approach Copy<64>+Overlap<32>
// for 65-96 and Copy<96>+Overlap<32> for 97-128
//
// Benchmarks showed that redundant writes were cheap (for Intel X86) but
// conditional were expensive, even on processor that do not support writing 64B
// at a time (pre-AVX512F). We also want to favor short functions that allow
// more hot code to fit in the iL1 cache.
//
// Above 128 we have to use conditionals since we don't know the upper bound in
// advance. SetAlignedBlocks<64> may waste up to 63 Bytes, SetAlignedBlocks<32>
// may waste up to 31 Bytes. Benchmarks showed that SetAlignedBlocks<64> was not
// superior for sizes that mattered.
inline static void GeneralPurposeMemset(char *dst, unsigned char value,
size_t count) {
if (count == 0)
return;
if (count == 1)
return SetBlock<1>(dst, value);
if (count == 2)
return SetBlock<2>(dst, value);
if (count == 3)
return SetBlock<3>(dst, value);
if (count == 4)
return SetBlock<4>(dst, value);
if (count <= 8)
return SetBlockOverlap<4>(dst, value, count);
if (count <= 16)
return SetBlockOverlap<8>(dst, value, count);
if (count <= 32)
return SetBlockOverlap<16>(dst, value, count);
if (count <= 64)
return SetBlockOverlap<32>(dst, value, count);
if (count <= 128)
return SetBlockOverlap<64>(dst, value, count);
return SetAlignedBlocks<32>(dst, value, count);
}
} // namespace __llvm_libc
#endif // LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H

View File

@ -0,0 +1,21 @@
//===-- Implementation of memset ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/string/memset.h"
#include "src/__support/common.h"
#include "src/string/memory_utils/memset_utils.h"
namespace __llvm_libc {
void *LLVM_LIBC_ENTRYPOINT(memset)(void *dst, int value, size_t count) {
GeneralPurposeMemset(reinterpret_cast<char *>(dst),
static_cast<unsigned char>(value), count);
return dst;
}
} // namespace __llvm_libc

20
libc/src/string/memset.h Normal file
View File

@ -0,0 +1,20 @@
//===-- Implementation header for memset ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMSET_H
#define LLVM_LIBC_SRC_STRING_MEMSET_H
#include "include/string.h"
namespace __llvm_libc {
void *memset(void *ptr, int value, size_t count);
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_STRING_MEMSET_H

View File

@ -2,3 +2,13 @@ add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_none" REJECT "${ALL_CPU_FEATURES}"
add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_sse" REQUIRE "SSE" REJECT "SSE2")
add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_avx" REQUIRE "AVX" REJECT "AVX2")
add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_avx512f" REQUIRE "AVX512F")
add_memset("memset_${LIBC_TARGET_MACHINE}_opt_none" REJECT "${ALL_CPU_FEATURES}")
add_memset("memset_${LIBC_TARGET_MACHINE}_opt_sse" REQUIRE "SSE" REJECT "SSE2")
add_memset("memset_${LIBC_TARGET_MACHINE}_opt_avx" REQUIRE "AVX" REJECT "AVX2")
add_memset("memset_${LIBC_TARGET_MACHINE}_opt_avx512f" REQUIRE "AVX512F")
add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_none" REJECT "${ALL_CPU_FEATURES}")
add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_sse" REQUIRE "SSE" REJECT "SSE2")
add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_avx" REQUIRE "AVX" REJECT "AVX2")
add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_avx512f" REQUIRE "AVX512F")

View File

@ -32,23 +32,28 @@ add_libc_unittest(
libc.src.string.strlen
)
# Tests all implementations of memcpy that can run on the host.
get_property(memcpy_implementations GLOBAL PROPERTY memcpy_implementations)
foreach(memcpy_config_name IN LISTS memcpy_implementations)
get_target_property(require_cpu_features libc.src.string.${memcpy_config_name} REQUIRE_CPU_FEATURES)
host_supports(can_run "${require_cpu_features}")
if(can_run)
add_libc_unittest(
${memcpy_config_name}_test
SUITE
libc_string_unittests
SRCS
memcpy_test.cpp
DEPENDS
libc.src.string.${memcpy_config_name}
)
else()
message(STATUS "Skipping test for '${memcpy_config_name}' insufficient host cpu features")
endif()
endforeach()
# Tests all implementations that can run on the host.
function(add_libc_multi_impl_test name)
get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)
foreach(fq_config_name IN LISTS fq_implementations)
get_target_property(required_cpu_features ${fq_config_name} REQUIRE_CPU_FEATURES)
host_supports(can_run "${required_cpu_features}")
if(can_run)
add_libc_unittest(
${fq_config_name}_test
SUITE
libc_string_unittests
DEPENDS
${fq_config_name}
${ARGN}
)
else()
message(STATUS "Skipping test for '${fq_config_name}' insufficient host cpu features '${required_cpu_features}'")
endif()
endforeach()
endfunction()
add_libc_multi_impl_test(memcpy SRCS memcpy_test.cpp)
add_libc_multi_impl_test(memset SRCS memset_test.cpp)
add_libc_multi_impl_test(bzero SRCS bzero_test.cpp)

View File

@ -0,0 +1,49 @@
//===-- Unittests for bzero -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/string/bzero.h"
#include "utils/CPP/ArrayRef.h"
#include "utils/UnitTest/Test.h"
using __llvm_libc::cpp::Array;
using __llvm_libc::cpp::ArrayRef;
using Data = Array<char, 2048>;
static const ArrayRef<char> kDeadcode("DEADC0DE", 8);
// Returns a Data object filled with a repetition of `filler`.
Data getData(ArrayRef<char> filler) {
Data out;
for (size_t i = 0; i < out.size(); ++i)
out[i] = filler[i % filler.size()];
return out;
}
TEST(BzeroTest, Thorough) {
const Data dirty = getData(kDeadcode);
for (size_t count = 0; count < 1024; ++count) {
for (size_t align = 0; align < 64; ++align) {
auto buffer = dirty;
char *const dst = &buffer[align];
__llvm_libc::bzero(dst, count);
// Everything before copy is untouched.
for (size_t i = 0; i < align; ++i)
ASSERT_EQ(buffer[i], dirty[i]);
// Everything in between is copied.
for (size_t i = 0; i < count; ++i)
ASSERT_EQ(buffer[align + i], char(0));
// Everything after copy is untouched.
for (size_t i = align + count; i < dirty.size(); ++i)
ASSERT_EQ(buffer[i], dirty[i]);
}
}
}
// FIXME: Add tests with reads and writes on the boundary of a read/write
// protected page to check we're not reading nor writing prior/past the allowed
// regions.

View File

@ -1,4 +1,4 @@
//===----------------------- Unittests for memcpy -------------------------===//
//===-- Unittests for memcpy ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@ -6,13 +6,12 @@
//
//===----------------------------------------------------------------------===//
#include "src/string/memcpy.h"
#include "utils/CPP/ArrayRef.h"
#include "utils/UnitTest/Test.h"
#include "src/string/memcpy.h"
using __llvm_libc::cpp::Array;
using __llvm_libc::cpp::ArrayRef;
using __llvm_libc::cpp::MutableArrayRef;
using Data = Array<char, 2048>;
static const ArrayRef<char> kNumbers("0123456789", 10);
@ -33,8 +32,10 @@ TEST(MemcpyTest, Thorough) {
for (size_t align = 0; align < 64; ++align) {
auto buffer = dirty;
const char *const src = groundtruth.data();
char *const dst = &buffer[align];
__llvm_libc::memcpy(dst, src, count);
void *const dst = &buffer[align];
void *const ret = __llvm_libc::memcpy(dst, src, count);
// Return value is `dst`.
ASSERT_EQ(ret, dst);
// Everything before copy is untouched.
for (size_t i = 0; i < align; ++i)
ASSERT_EQ(buffer[i], dirty[i]);

View File

@ -0,0 +1,53 @@
//===-- Unittests for memset ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/string/memset.h"
#include "utils/CPP/ArrayRef.h"
#include "utils/UnitTest/Test.h"
using __llvm_libc::cpp::Array;
using __llvm_libc::cpp::ArrayRef;
using Data = Array<char, 2048>;
static const ArrayRef<char> kDeadcode("DEADC0DE", 8);
// Returns a Data object filled with a repetition of `filler`.
Data getData(ArrayRef<char> filler) {
Data out;
for (size_t i = 0; i < out.size(); ++i)
out[i] = filler[i % filler.size()];
return out;
}
TEST(MemsetTest, Thorough) {
const Data dirty = getData(kDeadcode);
for (int value = -1; value <= 1; ++value) {
for (size_t count = 0; count < 1024; ++count) {
for (size_t align = 0; align < 64; ++align) {
auto buffer = dirty;
void *const dst = &buffer[align];
void *const ret = __llvm_libc::memset(dst, value, count);
// Return value is `dst`.
ASSERT_EQ(ret, dst);
// Everything before copy is untouched.
for (size_t i = 0; i < align; ++i)
ASSERT_EQ(buffer[i], dirty[i]);
// Everything in between is copied.
for (size_t i = 0; i < count; ++i)
ASSERT_EQ(buffer[align + i], (char)value);
// Everything after copy is untouched.
for (size_t i = align + count; i < dirty.size(); ++i)
ASSERT_EQ(buffer[i], dirty[i]);
}
}
}
}
// FIXME: Add tests with reads and writes on the boundary of a read/write
// protected page to check we're not reading nor writing prior/past the allowed
// regions.