diff --git a/lds/kendryte.ld b/lds/kendryte.ld index 94ae150..7e1100e 100644 --- a/lds/kendryte.ld +++ b/lds/kendryte.ld @@ -91,6 +91,16 @@ SECTIONS . = ALIGN(8); + /* Exception handling */ + .eh_frame : + { + KEEP (*(.eh_frame)) *(.eh_frame.*) + . = ALIGN(8); + } >ram AT>ram :ram_ro + .gnu_extab : { *(.gnu_extab) } >ram AT>ram :ram_ro + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } >ram AT>ram :ram_ro + .exception_ranges : { *(.exception_ranges .exception_ranges*) } >ram AT>ram :ram_ro + /* Init array and fini array */ .preinit_array : { diff --git a/lib/nncase/include/datatypes.h b/lib/nncase/include/datatypes.h index d4212d3..427a4a4 100644 --- a/lib/nncase/include/datatypes.h +++ b/lib/nncase/include/datatypes.h @@ -1,5 +1,22 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once +#include "target_config.h" #include +#include +#include #include #include @@ -26,13 +43,19 @@ struct value_range { T min; T max; + + static constexpr value_range full() noexcept + { + return { std::numeric_limits::lowest(), std::numeric_limits::max() }; + } }; typedef enum _reduce_op { reduce_mean, reduce_min, - reduce_max + reduce_max, + reduce_sum } reduce_op_t; typedef enum _binary_op @@ -40,9 +63,31 @@ typedef enum _binary_op binary_add, binary_sub, binary_mul, - binary_div + binary_div, + binary_min, + binary_max } binary_op_t; +typedef enum _unary_op +{ + unary_abs, + unary_ceil, + unary_cos, + unary_exp, + unary_floor, + unary_log, + unary_neg, + unary_rsqrt, + unary_sin, + unary_square +} unary_op_t; + +typedef enum _image_resize_mode +{ + image_resize_bilinear, + image_resize_nearest_neighbor +} image_resize_mode_t; + typedef struct _quant_param { int32_t zero_point; @@ -54,10 +99,17 @@ inline bool operator==(const quant_param_t &lhs, const quant_param_t &rhs) noexc return lhs.zero_point == rhs.zero_point && lhs.scale == rhs.scale; } +inline bool almost_equal(const quant_param_t &lhs, const quant_param_t &rhs) noexcept +{ + return lhs.zero_point == rhs.zero_point && std::abs(lhs.scale - rhs.scale) <= std::numeric_limits::epsilon(); +} + struct fixed_mul { float mul; int8_t shift; + + int32_t rounded_mul() const noexcept { return (int32_t)roundf(mul); } }; typedef enum _memory_type @@ -94,4 +146,14 @@ struct memory_range uint32_t start; uint32_t size; }; + +inline bool operator==(const padding &lhs, const padding &rhs) noexcept +{ + return lhs.before == rhs.before && lhs.after == rhs.after; +} + +inline bool operator!=(const padding &lhs, const padding &rhs) noexcept +{ + return lhs.before != rhs.before || lhs.after != rhs.after; +} } diff --git a/lib/nncase/include/io_utils.h b/lib/nncase/include/io_utils.h new file mode 100644 index 0000000..d1188e2 --- /dev/null +++ b/lib/nncase/include/io_utils.h @@ -0,0 +1,36 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include + +namespace nncase +{ +inline std::vector read_file(const std::filesystem::path &filename) +{ + std::ifstream infile(filename, std::ios::binary | std::ios::in); + if (infile.bad()) + throw std::runtime_error("Cannot open file: " + filename.string()); + + infile.seekg(0, std::ios::end); + size_t length = infile.tellg(); + infile.seekg(0, std::ios::beg); + std::vector data(length); + infile.read(reinterpret_cast(data.data()), length); + infile.close(); + return data; +} +} diff --git a/lib/nncase/include/kernels/cpu/cpu_kernels.h b/lib/nncase/include/kernels/cpu/cpu_kernels.h index c151890..7123959 100644 --- a/lib/nncase/include/kernels/cpu/cpu_kernels.h +++ b/lib/nncase/include/kernels/cpu/cpu_kernels.h @@ -1,6 +1,20 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../utils.h" -#include +#include namespace nncase { diff --git a/lib/nncase/include/kernels/k210/k210_kernels.h b/lib/nncase/include/kernels/k210/k210_kernels.h index 2782b6f..78345c8 100644 --- a/lib/nncase/include/kernels/k210/k210_kernels.h +++ b/lib/nncase/include/kernels/k210/k210_kernels.h @@ -1,7 +1,21 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../utils.h" -#include -#include +#include +#include namespace nncase { @@ -9,16 +23,39 @@ namespace kernels { namespace k210 { + namespace details + { + template + struct pool_partial_type; + + template <> + struct pool_partial_type + { + using type = uint32_t; + }; + + template <> + struct pool_partial_type + { + using type = float; + }; + + template + using pool_partial_type_t = typename pool_partial_type::type; + } + inline void kpu_upload(const uint8_t *src, uint8_t *dest, const runtime_shape_t &in_shape) { + using namespace runtime::k210; + if (in_shape[3] % 64 == 0) { std::copy(src, src + kernels::details::compute_size(in_shape), dest); } else { - auto layout = targets::k210::get_kpu_row_layout(in_shape[3]); - auto fmap_size = targets::k210::get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]); + auto layout = get_kpu_row_layout(in_shape[3]); + auto fmap_size = get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]); for (int32_t batch = 0; batch < in_shape[0]; batch++) { @@ -37,18 +74,18 @@ namespace kernels } } -#if NNCASE_TARGET_K210_SIMULATOR - inline void kpu_download(const uint8_t *src, uint8_t *dest, const runtime_shape_t &in_shape) { + using namespace runtime::k210; + if (in_shape[3] % 64 == 0) { std::copy(src, src + kernels::details::compute_size(in_shape), dest); } else { - auto layout = targets::k210::get_kpu_row_layout(in_shape[3]); - auto fmap_size = targets::k210::get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]); + auto layout = get_kpu_row_layout(in_shape[3]); + auto fmap_size = get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]); for (int32_t batch = 0; batch < in_shape[0]; batch++) { @@ -69,7 +106,7 @@ namespace kernels template void kpu_conv2d(const uint8_t *input, int64_t *workspace, uint8_t *output, const uint8_t *weights, int32_t in_h, int32_t in_w, int32_t in_channels, int32_t out_channels, uint8_t pad_value, int32_t arg_x, - int32_t shift_x, int32_t arg_w, int32_t shift_w, int64_t arg_add, const targets::k210::kpu_batchnorm_segment *batchnorm, const targets::k210::kpu_activation_table_t &activation) + int32_t shift_x, int32_t arg_w, int32_t shift_w, int64_t arg_add, const runtime::k210::kpu_batchnorm_segment *batchnorm, const runtime::k210::kpu_activation_table_t &activation) { const auto channel_size = size_t(in_h) * in_w; // conv @@ -142,19 +179,21 @@ namespace kernels for (size_t i = 0; i < channel_size; i++) { auto value = (*src_it++ * bn.mul >> bn.shift) + bn.add; - auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const targets::k210::kpu_activation_segment &seg) { + auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const runtime::k210::kpu_activation_segment &seg) { return value > seg.start_x; }); - value = runtime::carry_shift((value - seg.start_x) * seg.mul, seg.shift); + value = runtime::carry_shift((value - seg.start_x) * seg.mul, seg.shift); *out_it++ = (uint8_t)std::clamp(value, int64_t(0), int64_t(255)); } } } } - inline void kpu_pool2d(const uint8_t *input, uint8_t *output, int32_t in_h, int32_t in_w, int32_t in_channels, targets::k210::kpu_pool_type_t pool_type) + template + inline void kpu_pool2d(const T *input, T *output, int32_t in_h, int32_t in_w, int32_t in_channels, runtime::k210::kpu_pool_type_t pool_type) { - using namespace targets::k210; + using namespace runtime::k210; + using partial_t = details::pool_partial_type_t; const auto filter = get_kpu_filter_size(pool_type); const auto stride = get_kpu_filter_stride(pool_type); @@ -171,7 +210,7 @@ namespace kernels { const int32_t in_y_origin = oy * stride; const int32_t in_x_origin = ox * stride; - int32_t value = 0; + partial_t value = 0; switch (pool_type) { @@ -187,16 +226,17 @@ namespace kernels case kpu_pool_max_2_s1: case kpu_pool_max_4_s4: { + value = std::numeric_limits::lowest(); for (int32_t ky = 0; ky < filter; ky++) { for (int32_t kx = 0; kx < filter; kx++) { const int32_t in_y = in_y_origin + ky; const int32_t in_x = in_x_origin + kx; - int32_t in_v; + partial_t in_v; if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w) - in_v = 0; + in_v = std::numeric_limits::lowest(); else in_v = in_c_p[in_y * in_w + in_x]; @@ -216,7 +256,7 @@ namespace kernels { const int32_t in_y = std::clamp(in_y_origin + ky, 0, in_h - 1); const int32_t in_x = std::clamp(in_x_origin + kx, 0, in_w - 1); - const int32_t in_v = in_c_p[in_y * in_w + in_x]; + const T in_v = in_c_p[in_y * in_w + in_x]; value += in_v; } @@ -232,7 +272,7 @@ namespace kernels auto k_off = get_kpu_select_pool_offset(pool_type); const int32_t in_y = in_y_origin + k_off[0]; const int32_t in_x = in_x_origin + k_off[1]; - int32_t in_v; + partial_t in_v; if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w) in_v = 0; @@ -244,13 +284,68 @@ namespace kernels } } - *output++ = (uint8_t)value; + *output++ = (T)value; } } } } -#endif + template + void fake_kpu_conv2d(const float *input, float *output, const float *weights, const float *bias, int32_t in_h, int32_t in_w, int32_t in_channels, int32_t out_channels, const value_range &fused_activation) + { + const auto channel_size = size_t(in_h) * in_w; + + const auto pad = FilterSize == 1 ? 0 : 1; + const auto groups = IsDepthwise ? out_channels : 1; + const auto g_ic = IsDepthwise ? 1 : in_channels / groups; + const auto g_oc = IsDepthwise ? 1 : out_channels; + + for (int32_t og = 0; og < groups; og++) + { + const auto *w_group_p = weights + (size_t)og * g_oc * g_ic * FilterSize * FilterSize; + + for (int32_t oc = 0; oc < g_oc; oc++) + { + const auto *w_oc_p = w_group_p + (size_t)oc * g_ic * FilterSize * FilterSize; + + for (int32_t oy = 0; oy < in_h; oy++) + { + for (int32_t ox = 0; ox < in_w; ox++) + { + const int32_t in_y_origin = oy - pad; + const int32_t in_x_origin = ox - pad; + const int32_t filter_y_start = std::max(0, -in_y_origin); + const int32_t filter_y_end = std::min(FilterSize, in_h - in_y_origin); + const int32_t filter_x_start = std::max(0, -in_x_origin); + const int32_t filter_x_end = std::min(FilterSize, in_w - in_x_origin); + float value = bias[og * g_oc + oc]; + + for (int32_t ic = 0; ic < g_ic; ic++) + { + const auto *in_c_p = input + ((size_t)og * g_ic + ic) * in_h * in_w; + const auto *w_ic_p = w_oc_p + (size_t)ic * FilterSize * FilterSize; + + for (int32_t ky = filter_y_start; ky < filter_y_end; ky++) + { + for (int32_t kx = filter_x_start; kx < filter_x_end; kx++) + { + const int32_t in_y = in_y_origin + ky; + const int32_t in_x = in_x_origin + kx; + + const auto in_v = in_c_p[in_y * in_w + in_x]; + const auto w = w_ic_p[ky * FilterSize + kx]; + + value += in_v * w; + } + } + } + + *output++ = kernels::details::apply_activation(value, fused_activation); + } + } + } + } + } } } } diff --git a/lib/nncase/include/kernels/neutral/neutral_kernels.h b/lib/nncase/include/kernels/neutral/neutral_kernels.h index c4d372e..3c8d359 100644 --- a/lib/nncase/include/kernels/neutral/neutral_kernels.h +++ b/lib/nncase/include/kernels/neutral/neutral_kernels.h @@ -1,7 +1,21 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../utils.h" #include -#include +#include #include namespace nncase @@ -82,7 +96,7 @@ namespace kernels const int32_t filter_y_end = std::min(filter_h, (in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h); const int32_t filter_x_start = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w); const int32_t filter_x_end = std::min(filter_w, (in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w); - float value = bias[oc]; + float value = bias[og * g_oc + oc]; for (int32_t ic = 0; ic < g_ic; ic++) { @@ -172,7 +186,7 @@ namespace kernels if (d0 < paddings[0].before || d0 >= out_shape[0] - paddings[0].after || d1 < paddings[1].before || d1 >= out_shape[1] - paddings[1].after || d2 < paddings[2].before || d2 >= out_shape[2] - paddings[2].after - || d3 < paddings[3].before || d1 >= out_shape[3] - paddings[3].after) + || d3 < paddings[3].before || d3 >= out_shape[3] - paddings[3].after) *output++ = pad_value; else *output++ = in2[d3_origin + d3]; diff --git a/lib/nncase/include/kernels/utils.h b/lib/nncase/include/kernels/utils.h index 62d717f..a5cdbd0 100644 --- a/lib/nncase/include/kernels/utils.h +++ b/lib/nncase/include/kernels/utils.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include #include diff --git a/lib/nncase/include/nncase.h b/lib/nncase/include/nncase.h index 28666e1..f15aeb2 100644 --- a/lib/nncase/include/nncase.h +++ b/lib/nncase/include/nncase.h @@ -1,4 +1,4 @@ -/* Copyright 2018 Canaan Inc. +/* Copyright 2019 Canaan Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/lib/nncase/include/runtime/binary_writer.h b/lib/nncase/include/runtime/binary_writer.h index 1af2d8f..ef49862 100644 --- a/lib/nncase/include/runtime/binary_writer.h +++ b/lib/nncase/include/runtime/binary_writer.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include #include @@ -36,12 +50,18 @@ namespace runtime stream_.seekp(pos); } - void align_position(size_t alignment) + std::streamoff align_position(size_t alignment) { auto pos = position(); auto rem = pos % alignment; if (rem != 0) - position(pos + std::streamoff(alignment - rem)); + { + auto off = std::streamoff(alignment - rem); + position(pos + off); + return off; + } + + return 0; } private: diff --git a/lib/nncase/include/targets/cpu/cpu_ops_body.h b/lib/nncase/include/runtime/cpu/cpu_ops_body.h similarity index 85% rename from lib/nncase/include/targets/cpu/cpu_ops_body.h rename to lib/nncase/include/runtime/cpu/cpu_ops_body.h index afdb8e1..eb9a686 100644 --- a/lib/nncase/include/targets/cpu/cpu_ops_body.h +++ b/lib/nncase/include/runtime/cpu/cpu_ops_body.h @@ -1,9 +1,23 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../node_body.h" namespace nncase { -namespace targets +namespace runtime { namespace cpu { @@ -25,7 +39,7 @@ namespace targets xtl::span weights; xtl::span bias; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(input); reader.read(output); @@ -62,7 +76,7 @@ namespace targets xtl::span weights; xtl::span bias; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(input); reader.read(output); @@ -121,7 +135,7 @@ namespace targets xtl::span weights; xtl::span bias; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(input); reader.read(output); @@ -166,7 +180,7 @@ namespace targets xtl::span weights; xtl::span bias; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(input); reader.read(output); diff --git a/lib/nncase/include/runtime/cpu/interpreter.h b/lib/nncase/include/runtime/cpu/interpreter.h new file mode 100644 index 0000000..fae77e4 --- /dev/null +++ b/lib/nncase/include/runtime/cpu/interpreter.h @@ -0,0 +1,31 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +namespace nncase +{ +namespace runtime +{ + namespace cpu + { + class interpreter : public runtime::interpreter_base + { + public: + using interpreter_base::interpreter_base; + }; + } +} +} diff --git a/lib/nncase/include/runtime/interpreter.h b/lib/nncase/include/runtime/interpreter.h index 20e827b..6026883 100644 --- a/lib/nncase/include/runtime/interpreter.h +++ b/lib/nncase/include/runtime/interpreter.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "model.h" #include diff --git a/lib/nncase/include/targets/k210/interpreter.h b/lib/nncase/include/runtime/k210/interpreter.h similarity index 53% rename from lib/nncase/include/targets/k210/interpreter.h rename to lib/nncase/include/runtime/k210/interpreter.h index 3dc1d0e..9bc4c5f 100644 --- a/lib/nncase/include/targets/k210/interpreter.h +++ b/lib/nncase/include/runtime/k210/interpreter.h @@ -1,20 +1,34 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "k210_sim_types.h" #include namespace nncase { -namespace targets +namespace runtime { namespace k210 { struct k210_interpreter_context { - runtime::interpreter_base *interpreter; - runtime::interpreter_step_t step; + interpreter_base *interpreter; + interpreter_step_t step; }; - class interpreter : public runtime::interpreter_base + class interpreter : public interpreter_base { public: using interpreter_base::memory_at; diff --git a/lib/nncase/include/targets/k210/k210_ops_body.h b/lib/nncase/include/runtime/k210/k210_ops_body.h similarity index 54% rename from lib/nncase/include/targets/k210/k210_ops_body.h rename to lib/nncase/include/runtime/k210/k210_ops_body.h index d32b2b5..3c6be5e 100644 --- a/lib/nncase/include/targets/k210/k210_ops_body.h +++ b/lib/nncase/include/runtime/k210/k210_ops_body.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../node_body.h" #include "k210_runtime_op_utility.h" @@ -5,7 +19,7 @@ namespace nncase { -namespace targets +namespace runtime { namespace k210 { @@ -26,7 +40,7 @@ namespace targets const kpu_activate_table_t *activation; xtl::span weights; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(main_mem_output); reader.read(batches); @@ -52,6 +66,27 @@ namespace targets layer.kernel_load_cfg.data.para_start_addr = (uintptr_t)weights.data(); #endif } + + void serialize(binary_writer &writer) + { + writer.write(main_mem_output); + writer.write(batches); + writer.write(reserved0); + + auto layer_pos = writer.position(); + writer.position(layer_pos + std::streamoff(sizeof(layer))); + layer.kernel_pool_type_cfg.data.bwsx_base_addr = (uint32_t)writer.align_position(8); + writer.write_array(batch_norm); + layer.kernel_calc_type_cfg.data.active_addr = (uint32_t)writer.align_position(256); + writer.write(*activation); + layer.kernel_load_cfg.data.para_start_addr = (uint32_t)writer.align_position(128); + writer.write_array(weights); + + auto end_pos = writer.position(); + writer.position(layer_pos); + writer.write(layer); + writer.position(end_pos); + } }; } } diff --git a/lib/nncase/include/targets/k210/k210_runtime_op_utility.h b/lib/nncase/include/runtime/k210/k210_runtime_op_utility.h similarity index 65% rename from lib/nncase/include/targets/k210/k210_runtime_op_utility.h rename to lib/nncase/include/runtime/k210/k210_runtime_op_utility.h index 6b5df2e..2596f11 100644 --- a/lib/nncase/include/targets/k210/k210_runtime_op_utility.h +++ b/lib/nncase/include/runtime/k210/k210_runtime_op_utility.h @@ -1,9 +1,23 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "k210_sim_types.h" namespace nncase { -namespace targets +namespace runtime { namespace k210 { @@ -49,11 +63,24 @@ namespace targets case kpu_filter_3x3: return 3; default: - return 0; + NNCASE_THROW(std::runtime_error, "Invalid kpu filter"); } } - inline int get_kpu_rows(int32_t width, int32_t height, int32_t channels) + inline int32_t get_kpu_padding(kpu_filter_type_t filter) + { + switch (filter) + { + case kpu_filter_1x1: + return 0; + case kpu_filter_3x3: + return 1; + default: + NNCASE_THROW(std::runtime_error, "Invalid kpu filter"); + } + } + + inline int32_t get_kpu_rows(int32_t width, int32_t height, int32_t channels) { auto layout = get_kpu_row_layout(width); auto one_line_channels = std::min(channels, layout.groups); @@ -62,12 +89,15 @@ namespace targets return size; } - inline int get_kpu_bytes(int32_t width, int32_t height, int32_t channels) + inline int32_t get_kpu_bytes(int32_t width, int32_t height, int32_t channels) { return get_kpu_rows(width, height, channels) * 64; } -#if NNCASE_TARGET_K210_SIMULATOR + inline int32_t get_kpu_bytes(const runtime_shape_t &shape) + { + return get_kpu_bytes(shape[3], shape[2], shape[1]) * shape[0]; + } inline int32_t get_kpu_filter_size(kpu_pool_type_t filter) { @@ -86,6 +116,8 @@ namespace targets case kpu_pool_mean_4_s4: case kpu_pool_left_top_4_s4: return 4; + default: + NNCASE_THROW(std::runtime_error, "Invalid kpu filter"); } } @@ -107,6 +139,8 @@ namespace targets case kpu_pool_mean_4_s4: case kpu_pool_left_top_4_s4: return 4; + default: + NNCASE_THROW(std::runtime_error, "Invalid kpu pool type"); } } @@ -125,10 +159,10 @@ namespace targets return { 0, 1 }; case kpu_pool_left_top_4_s4: return { 0, 0 }; + default: + NNCASE_THROW(std::runtime_error, "Invalid kpu pool type"); } } - -#endif } } } diff --git a/lib/nncase/include/targets/k210/k210_sim_types.h b/lib/nncase/include/runtime/k210/k210_sim_types.h similarity index 88% rename from lib/nncase/include/targets/k210/k210_sim_types.h rename to lib/nncase/include/runtime/k210/k210_sim_types.h index 17398ec..09c0f8d 100644 --- a/lib/nncase/include/targets/k210/k210_sim_types.h +++ b/lib/nncase/include/runtime/k210/k210_sim_types.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include #include @@ -11,7 +25,7 @@ namespace nncase { -namespace targets +namespace runtime { namespace k210 { diff --git a/lib/nncase/include/runtime/kernel_registry.h b/lib/nncase/include/runtime/kernel_registry.h index 0dc1ddd..8803b67 100644 --- a/lib/nncase/include/runtime/kernel_registry.h +++ b/lib/nncase/include/runtime/kernel_registry.h @@ -1,5 +1,19 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once -#include "target_config.h" +#include "target_interpreter.h" #include #include #include diff --git a/lib/nncase/include/runtime/model.h b/lib/nncase/include/runtime/model.h index b597618..48e8509 100644 --- a/lib/nncase/include/runtime/model.h +++ b/lib/nncase/include/runtime/model.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../datatypes.h" #include "runtime_op.h" @@ -9,7 +23,7 @@ namespace runtime enum model_target : uint32_t { MODEL_TARGET_CPU = 0, - MODEL_TARGET_K210 = 1, + MODEL_TARGET_K210 = 1 }; struct model_header diff --git a/lib/nncase/include/targets/neutral/neutral_ops_body.h b/lib/nncase/include/runtime/neutral/neutral_ops_body.h similarity index 83% rename from lib/nncase/include/targets/neutral/neutral_ops_body.h rename to lib/nncase/include/runtime/neutral/neutral_ops_body.h index d4ea798..584f98e 100644 --- a/lib/nncase/include/targets/neutral/neutral_ops_body.h +++ b/lib/nncase/include/runtime/neutral/neutral_ops_body.h @@ -1,9 +1,23 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../node_body.h" namespace nncase { -namespace targets +namespace runtime { namespace neutral { @@ -28,7 +42,7 @@ namespace targets xtl::span inputs; xtl::span dims; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(output); reader.read(inner_size); @@ -38,7 +52,7 @@ namespace targets reader.read_span(dims, inputs_count); } - void serialize(runtime::binary_writer &writer) const + void serialize(binary_writer &writer) const { writer.write(output); writer.write(inner_size); @@ -68,7 +82,7 @@ namespace targets xtl::span weights; xtl::span bias; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(input); reader.read(output); @@ -88,7 +102,7 @@ namespace targets reader.read_span(bias, out_channels); } - void serialize(runtime::binary_writer &writer) const + void serialize(binary_writer &writer) const { writer.write(input); writer.write(output); @@ -127,7 +141,7 @@ namespace targets value_range fused_activation; xtl::span bias; - void deserialize(runtime::span_reader &reader) + void deserialize(span_reader &reader) { reader.read(input_a); reader.read(input_b); @@ -139,7 +153,7 @@ namespace targets reader.read_span(bias, b_cols); } - void serialize(runtime::binary_writer &writer) const + void serialize(binary_writer &writer) const { writer.write(input_a); writer.write(input_b); @@ -202,23 +216,14 @@ namespace targets value_range fused_activation; }; - struct resize_bilinear_options : public simple_node_body - { - memory_range input; - memory_range output; - runtime_shape_t in_shape; - int32_t out_h; - int32_t out_w; - bool align_corners; - }; - - struct resize_nearest_neighbor_options : public simple_node_body + struct resize_image_options : public simple_node_body { memory_range input; memory_range output; runtime_shape_t in_shape; int32_t out_h; int32_t out_w; + image_resize_mode_t mode; bool align_corners; }; @@ -253,6 +258,13 @@ namespace targets int32_t new_axis_mask; int32_t shrink_axis_mask; }; + + struct unary_options : public simple_node_body + { + memory_range input; + memory_range output; + unary_op_t unary_op; + }; } } } diff --git a/lib/nncase/include/runtime/node_body.h b/lib/nncase/include/runtime/node_body.h new file mode 100644 index 0000000..8bfeb47 --- /dev/null +++ b/lib/nncase/include/runtime/node_body.h @@ -0,0 +1,38 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "../datatypes.h" +#include "binary_writer.h" +#include "span_reader.h" + +namespace nncase +{ +namespace runtime +{ + template + struct simple_node_body + { + void deserialize(span_reader &reader) + { + reader.read(static_cast(*this)); + } + + void serialize(binary_writer &writer) const + { + writer.write(static_cast(*this)); + } + }; +} +} diff --git a/lib/nncase/include/runtime/runtime_op.def b/lib/nncase/include/runtime/runtime_op.def index f18dd35..c1a427b 100644 --- a/lib/nncase/include/runtime/runtime_op.def +++ b/lib/nncase/include/runtime/runtime_op.def @@ -1,32 +1,32 @@ BEGINE_DEFINE_TARGET(neutral) - DEFINE_RUNTIME_OP(neutral, binary, Binary, 0) - DEFINE_RUNTIME_OP(neutral, concat, Concat, 1) - DEFINE_RUNTIME_OP(neutral, conv2d, Conv2D, 2) - DEFINE_RUNTIME_OP(neutral, dequantize, Dequantize, 3) - DEFINE_RUNTIME_OP(neutral, matmul, MatMul, 4) - DEFINE_RUNTIME_OP(neutral, pad, Pad, 5) - DEFINE_RUNTIME_OP(neutral, quantize, Quantize, 6) - DEFINE_RUNTIME_OP(neutral, reduce, Reduce, 7) - DEFINE_RUNTIME_OP(neutral, reduce_window2d, ReduceWindow2D, 8) - DEFINE_RUNTIME_OP(neutral, memory_copy, MemoryCopy, 9) - DEFINE_RUNTIME_OP(neutral, resize_bilinear, ResizeBilinear, 10) - DEFINE_RUNTIME_OP(neutral, resize_nearest_neighbor, ResizeNearestNeighbor, 11) - DEFINE_RUNTIME_OP(neutral, softmax, Softmax, 12) - DEFINE_RUNTIME_OP(neutral, transpose, Transpose, 13) - DEFINE_RUNTIME_OP(neutral, strided_slice, StridedSlice, 14) + DEFINE_NEUTRAL_RUNTIME_OP(binary, Binary, 0x0) + DEFINE_NEUTRAL_RUNTIME_OP(concat, Concat, 0x1) + DEFINE_NEUTRAL_RUNTIME_OP(conv2d, Conv2D, 0x2) + DEFINE_NEUTRAL_RUNTIME_OP(dequantize, Dequantize, 0x3) + DEFINE_NEUTRAL_RUNTIME_OP(matmul, MatMul, 0x4) + DEFINE_NEUTRAL_RUNTIME_OP(pad, Pad, 0x5) + DEFINE_NEUTRAL_RUNTIME_OP(quantize, Quantize, 0x6) + DEFINE_NEUTRAL_RUNTIME_OP(reduce, Reduce, 0x7) + DEFINE_NEUTRAL_RUNTIME_OP(reduce_window2d, ReduceWindow2D, 0x8) + DEFINE_NEUTRAL_RUNTIME_OP(memory_copy, MemoryCopy, 0x9) + DEFINE_NEUTRAL_RUNTIME_OP(resize_image, ResizeImage, 0x0A) + DEFINE_NEUTRAL_RUNTIME_OP(softmax, Softmax, 0x0B) + DEFINE_NEUTRAL_RUNTIME_OP(transpose, Transpose, 0x0C) + DEFINE_NEUTRAL_RUNTIME_OP(strided_slice, StridedSlice, 0x0D) + DEFINE_NEUTRAL_RUNTIME_OP(unary, Unary, 0x0E) END_DEFINE_TARGET() // CPU BEGINE_DEFINE_TARGET(cpu) - DEFINE_RUNTIME_OP(cpu, cpu_conv2d, CPU_CPUConv2D, 1001) - DEFINE_RUNTIME_OP(cpu, cpu_depthwise_conv2d, CPU_CPUDepthwiseConv2D, 1002) - DEFINE_RUNTIME_OP(cpu, cpu_reduce_window2d, CPU_CPUReduceWindow2D, 1003) - DEFINE_RUNTIME_OP(cpu, cpu_quantized_conv2d, CPU_CPUQuantizedConv2D, 1004) - DEFINE_RUNTIME_OP(cpu, cpu_quantized_depthwise_conv2d, CPU_CPUQuantizedDepthwiseConv2D, 1005) + DEFINE_RUNTIME_OP(cpu, cpu_conv2d, CPUConv2D, 0x1001) + DEFINE_RUNTIME_OP(cpu, cpu_depthwise_conv2d, CPUDepthwiseConv2D, 0x1002) + DEFINE_RUNTIME_OP(cpu, cpu_reduce_window2d, CPUReduceWindow2D, 0x1003) + DEFINE_RUNTIME_OP(cpu, cpu_quantized_conv2d, CPUQuantizedConv2D, 0x1004) + DEFINE_RUNTIME_OP(cpu, cpu_quantized_depthwise_conv2d, CPUQuantizedDepthwiseConv2D, 0x1005) END_DEFINE_TARGET() // K210 BEGINE_DEFINE_TARGET(k210) - DEFINE_RUNTIME_OP(k210, kpu_upload, K210_KPUUpload, 2001) - DEFINE_RUNTIME_OP(k210, kpu_conv2d, K210_KPUConv2D, 2002) + DEFINE_RUNTIME_OP(k210, kpu_upload, KPUUpload, 0x2001) + DEFINE_RUNTIME_OP(k210, kpu_conv2d, KPUConv2D, 0x2002) END_DEFINE_TARGET() diff --git a/lib/nncase/include/runtime/runtime_op.h b/lib/nncase/include/runtime/runtime_op.h index d927ba9..3cd956d 100644 --- a/lib/nncase/include/runtime/runtime_op.h +++ b/lib/nncase/include/runtime/runtime_op.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include "../datatypes.h" #include @@ -7,7 +21,8 @@ namespace nncase namespace runtime { #define BEGINE_DEFINE_TARGET(...) -#define DEFINE_RUNTIME_OP(target, id, name, value) rop_##id = value, +#define DEFINE_NEUTRAL_RUNTIME_OP(id, name, value) rop_##id = value, +#define DEFINE_RUNTIME_OP(target, id, name, value) rop_##target##_##id = value, #define END_DEFINE_TARGET() enum runtime_opcode : uint32_t @@ -15,10 +30,14 @@ namespace runtime #include "runtime_op.def" }; +#undef DEFINE_NEUTRAL_RUNTIME_OP #undef DEFINE_RUNTIME_OP -#define DEFINE_RUNTIME_OP(target, id, name, value) \ +#define DEFINE_NEUTRAL_RUNTIME_OP(id, name, value) \ case rop_##id: \ return #name; +#define DEFINE_RUNTIME_OP(target, id, name, value) \ + case rop_##target##_##id: \ + return #name; constexpr std::string_view node_opcode_names(runtime_opcode opcode) { @@ -31,6 +50,7 @@ namespace runtime } #undef BEGINE_DEFINE_TARGET +#undef DEFINE_NEUTRAL_RUNTIME_OP #undef DEFINE_RUNTIME_OP #undef END_DEFINE_TARGET } diff --git a/lib/nncase/include/runtime/runtime_op_utility.h b/lib/nncase/include/runtime/runtime_op_utility.h new file mode 100644 index 0000000..c153c9f --- /dev/null +++ b/lib/nncase/include/runtime/runtime_op_utility.h @@ -0,0 +1,146 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "../datatypes.h" + +namespace nncase +{ +namespace runtime +{ + inline size_t get_bytes(datatype_t type) + { + size_t element_size; + + switch (type) + { + case dt_float32: + element_size = 4; + break; + case dt_uint8: + element_size = 1; + break; + default: + NNCASE_THROW(std::runtime_error, "Not supported data type"); + } + + return element_size; + } + + template + uint8_t count_leading_zeros(T value) + { + uint8_t num_zeroes = 0; + for (int32_t i = Bits - 1; i >= 0; i--) + { + if ((value & (1ULL << i)) == 0) + ++num_zeroes; + else + break; + } + + return num_zeroes; + } + + template + inline T bit_mask(uint8_t shift) + { + return (T(1) << shift) - 1; + } + + template + T carry_shift(T value, uint8_t shift) + { + if (shift > 0) + { + if constexpr (Banker) + { + T result; + // Sign | Int (T - shift - 1 bits) | Frac (shift bits) + // S IIII FFF + auto integral = value >> shift; + auto fractional = value & bit_mask(shift); + auto sign = value < 0 ? -1 : 1; + auto half = 1 << (shift - 1); + + // frac < 0.5 + if (fractional < half) + { + return integral; + } + // frac > 0.5 + else if (fractional > half) + { + return integral + sign; + } + // frac == 0.5 + else + { + // odd + if (integral & 1) + return integral + sign; + // even + else + return integral; + } + + return result; + } + else + { + value >>= shift - 1; + if (value & 0x1) + { + if (value < 0) + value = (value >> 1) - 1; + else + value = (value >> 1) + 1; + } + else + { + value >>= 1; + } + } + } + + return value; + } + + template + inline int32_t mul_and_carry_shift(int32_t value, int32_t mul, uint8_t shift) + { + return (int32_t)carry_shift((int64_t)value * mul, shift); + } + + template + struct to_datatype + { + }; + + template <> + struct to_datatype + { + static constexpr datatype_t type = dt_float32; + }; + + template <> + struct to_datatype + { + static constexpr datatype_t type = dt_uint8; + }; + + template + inline constexpr datatype_t to_datatype_v = to_datatype::type; +} +} diff --git a/lib/nncase/include/runtime/span_reader.h b/lib/nncase/include/runtime/span_reader.h index c60d009..0c92bbd 100644 --- a/lib/nncase/include/runtime/span_reader.h +++ b/lib/nncase/include/runtime/span_reader.h @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include diff --git a/lib/nncase/include/runtime/target_config.h b/lib/nncase/include/runtime/target_config.h deleted file mode 100644 index 381e5de..0000000 --- a/lib/nncase/include/runtime/target_config.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#define NNCASE_CONCAT_3(a, b, c) a/b/c -#define NNCASE_TARGET_HEADER_(target, name) -#define NNCASE_TARGET_HEADER(name) NNCASE_TARGET_HEADER_(NNCASE_TARGET, name) - -#include NNCASE_TARGET_HEADER(interpreter.h) - -namespace nncase -{ -namespace runtime -{ - using interpreter_t = nncase::targets::NNCASE_TARGET::interpreter; -} -} diff --git a/lib/nncase/include/runtime/target_interpreter.h b/lib/nncase/include/runtime/target_interpreter.h new file mode 100644 index 0000000..c6ccea8 --- /dev/null +++ b/lib/nncase/include/runtime/target_interpreter.h @@ -0,0 +1,28 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "../target_config.h" + +// clang-format off +#include NNCASE_TARGET_HEADER(runtime,interpreter.h) +// clang-format on + +namespace nncase +{ +namespace runtime +{ + using interpreter_t = nncase::runtime::NNCASE_TARGET::interpreter; +} +} diff --git a/lib/nncase/include/runtime_op_utility.h b/lib/nncase/include/runtime_op_utility.h deleted file mode 100644 index 78fab1c..0000000 --- a/lib/nncase/include/runtime_op_utility.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once -#include -#include - -namespace nncase -{ -namespace runtime -{ - inline size_t get_bytes(datatype_t type) - { - size_t element_size; - - switch (type) - { - case dt_float32: - element_size = 4; - break; - case dt_uint8: - element_size = 1; - break; - default: - assert(!"Not supported data type"); - } - - return element_size; - } - - template - uint8_t count_leading_zeros(T value) - { - uint8_t num_zeroes = 0; - for (int32_t i = Bits - 1; i >= 0; i--) - { - if ((value & (1ULL << i)) == 0) - ++num_zeroes; - else - break; - } - - return num_zeroes; - } - - template - T carry_shift(T value, uint8_t shift) - { - if (shift > 0) - { - value >>= shift - 1; - if (value & 0x1) - { - if (value < 0) - value = (value >> 1) - 1; - else - value = (value >> 1) + 1; - } - else - { - value >>= 1; - } - } - - return value; - } - - inline int32_t mul_and_carry_shift(int32_t value, int32_t mul, uint8_t shift) - { - return (int32_t)carry_shift((int64_t) value * mul, shift); - } -} -} diff --git a/lib/nncase/include/target_config.h b/lib/nncase/include/target_config.h new file mode 100644 index 0000000..ec91a53 --- /dev/null +++ b/lib/nncase/include/target_config.h @@ -0,0 +1,32 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +// clang-format off +#define NNCASE_STRINGFY(x) #x +#define NNCASE_CONCAT_2(a, b) a/b +#define NNCASE_CONCAT_3(a, b, c) NNCASE_CONCAT_2(NNCASE_CONCAT_2(a, b), c) +// clang-format on + +#define NNCASE_TARGET_HEADER_(prefix, target, name) +#define NNCASE_TARGET_HEADER(prefix, name) NNCASE_TARGET_HEADER_(prefix, NNCASE_TARGET, name) + +#ifndef NNCASE_NO_EXCEPTIONS +#include +#define NNCASE_THROW(exception, ...) throw exception(__VA_ARGS__) +#else +#define NNCASE_THROW(exception, ...) assert(0 && #exception) +#endif diff --git a/lib/nncase/include/targets/cpu/interpreter.h b/lib/nncase/include/targets/cpu/interpreter.h deleted file mode 100644 index 3289770..0000000 --- a/lib/nncase/include/targets/cpu/interpreter.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include - -namespace nncase -{ -namespace targets -{ - namespace cpu - { - class interpreter : public runtime::interpreter_base - { - public: - using interpreter_base::interpreter_base; - }; - } -} -} diff --git a/lib/nncase/include/targets/node_body.h b/lib/nncase/include/targets/node_body.h deleted file mode 100644 index 7920fe5..0000000 --- a/lib/nncase/include/targets/node_body.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once -#include "../runtime/binary_writer.h" -#include "../runtime/span_reader.h" -#include - -namespace nncase -{ -namespace targets -{ - template - struct simple_node_body - { - void deserialize(runtime::span_reader &reader) - { - reader.read(static_cast(*this)); - } - - void serialize(runtime::binary_writer &writer) const - { - writer.write(static_cast(*this)); - } - }; -} -} diff --git a/lib/nncase/include/targets/target.h b/lib/nncase/include/targets/target.h new file mode 100644 index 0000000..6057ddd --- /dev/null +++ b/lib/nncase/include/targets/target.h @@ -0,0 +1,37 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace nncase +{ +class target +{ +public: + virtual void fill_allocators(std::unordered_map &allocators, std::vector> &allocator_holders) = 0; + virtual void registry_codegen_ops() = 0; + virtual void registry_evaluator_ops() = 0; + virtual void add_default_transforms(std::vector> &transforms) = 0; + virtual void add_optimize1_transforms(std::vector> &transforms) = 0; + virtual void add_optimize2_transforms(std::vector> &transforms) = 0; + virtual void add_quantization_checkpoint_transforms(std::vector> &transforms) = 0; + virtual void add_quantization_transforms(ir::quantizer& quantizer, const quant_param_t& input_quant_param, std::vector> &transforms) = 0; +}; +} diff --git a/lib/nncase/nncase.cpp b/lib/nncase/nncase.cpp index 13115e4..0951146 100644 --- a/lib/nncase/nncase.cpp +++ b/lib/nncase/nncase.cpp @@ -1,4 +1,4 @@ -/* Copyright 2018 Canaan Inc. +/* Copyright 2019 Canaan Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,12 +13,26 @@ * limitations under the License. */ #include -#include +#include +#include #include using namespace nncase; using namespace nncase::runtime; +#define NNCASE_DEBUG 0 + +namespace +{ +void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata) +{ + dmac_set_irq(dma_ch, callback, userdata, 1); + dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT, + DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8); + usleep(1); +} +} + class nncase_context { public: @@ -46,20 +60,46 @@ public: auto input = interpreter_.input_at(0); auto mem = interpreter_.memory_at(input); - std::copy(src, src + mem.size(), mem.begin()); - interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this); - return 0; + if (input.memory_type == mem_main) + { + std::copy(src, src + mem.size(), mem.begin()); + interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this); + return 0; + } + else if (input.memory_type == mem_k210_kpu) + { + auto shape = interpreter_.input_shape_at(0); + if (shape[3] % 64 == 0) + { + kpu_upload_dma(dma_ch, src, mem.data(), mem.size(), upload_done_thunk, this); + } + else + { + kernels::k210::kpu_upload(src, mem.data(), shape); + } + + return 0; + } + + return -1; } private: void on_done() { +#if NNCASE_DEBUG printf("Total: %fms\n", interpreter_.total_duration().count() / 1e6); +#endif if (done_callback_) done_callback_(userdata_); } + void on_upload_done() + { + interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this); + } + static void done_thunk(void *userdata) { reinterpret_cast(userdata)->on_done(); @@ -67,12 +107,22 @@ private: static void on_error_thunk(const char *err, void *userdata) { +#if NNCASE_DEBUG printf("Fatal: %s\n", err); +#endif } static void node_profile_thunk(runtime_opcode op, std::chrono::nanoseconds duration, void *userdata) { +#if NNCASE_DEBUG printf("%s: %fms\n", node_opcode_names(op).data(), duration.count() / 1e6); +#endif + } + + static int upload_done_thunk(void *userdata) + { + reinterpret_cast(userdata)->on_upload_done(); + return 0; } private: diff --git a/lib/nncase/targets/cpu/cpu_ops.cpp b/lib/nncase/runtime/cpu/cpu_ops.cpp similarity index 85% rename from lib/nncase/targets/cpu/cpu_ops.cpp rename to lib/nncase/runtime/cpu/cpu_ops.cpp index d83fa84..fc4ccd3 100644 --- a/lib/nncase/targets/cpu/cpu_ops.cpp +++ b/lib/nncase/runtime/cpu/cpu_ops.cpp @@ -1,13 +1,27 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include #include -#include +#include using namespace nncase; using namespace nncase::runtime; namespace nncase { -namespace targets +namespace runtime { namespace cpu { diff --git a/lib/nncase/runtime/interpreter.cpp b/lib/nncase/runtime/interpreter.cpp index a993a1d..3505548 100644 --- a/lib/nncase/runtime/interpreter.cpp +++ b/lib/nncase/runtime/interpreter.cpp @@ -1,3 +1,17 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include #include #include diff --git a/lib/nncase/targets/k210/interpreter.cpp b/lib/nncase/runtime/k210/interpreter.cpp similarity index 54% rename from lib/nncase/targets/k210/interpreter.cpp rename to lib/nncase/runtime/k210/interpreter.cpp index 7155e14..2ecde0e 100644 --- a/lib/nncase/targets/k210/interpreter.cpp +++ b/lib/nncase/runtime/k210/interpreter.cpp @@ -1,8 +1,22 @@ -#include +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include using namespace nncase; using namespace nncase::runtime; -using namespace nncase::targets::k210; +using namespace nncase::runtime::k210; interpreter::interpreter() #if NNCASE_TARGET_K210_SIMULATOR diff --git a/lib/nncase/targets/k210/k210_ops.cpp b/lib/nncase/runtime/k210/k210_ops.cpp similarity index 81% rename from lib/nncase/targets/k210/k210_ops.cpp rename to lib/nncase/runtime/k210/k210_ops.cpp index d7a092d..cc2d772 100644 --- a/lib/nncase/targets/k210/k210_ops.cpp +++ b/lib/nncase/runtime/k210/k210_ops.cpp @@ -1,6 +1,20 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include +#include #include -#include #if !NNCASE_TARGET_K210_SIMULATOR #include #include @@ -8,7 +22,7 @@ using namespace nncase; using namespace nncase::runtime; -using namespace nncase::targets::k210; +using namespace nncase::runtime::k210; namespace { @@ -37,6 +51,7 @@ void kpu_conv2d_normal(kpu_layer_argument_t &layer, plic_irq_callback_t callback plic_irq_register(IRQN_AI_INTERRUPT, callback, userdata); plic_irq_enable(IRQN_AI_INTERRUPT); kpu_send_layer(layer); + usleep(1); } void kpu_conv2d_output(kpu_layer_argument_t &layer, dmac_channel_number_t dma_ch, uint8_t *dest, plic_irq_callback_t callback, void *userdata) @@ -60,12 +75,27 @@ int kpu_plic_thunk(void *userdata) (ctx.interpreter->*ctx.step)(); return 0; } + +void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata) +{ + dmac_set_irq(dma_ch, callback, userdata, 1); + dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT, + DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8); + usleep(1); +} + +int kpu_dma_plic_thunk(void *userdata) +{ + auto &ctx = *reinterpret_cast(userdata); + (ctx.interpreter->*ctx.step)(); + return 0; +} #endif } namespace nncase { -namespace targets +namespace runtime { namespace k210 { @@ -73,6 +103,16 @@ namespace targets { auto input = interpreter.memory_at(options.input); auto output = interpreter.memory_at(options.output); +#if !NNCASE_TARGET_K210_SIMULATOR + if (options.in_shape[3] % 64 == 0) + { + auto &ctx = interpreter.context(); + ctx.interpreter = &interpreter; + ctx.step = step; + kpu_upload_dma(interpreter.dma_ch(), input.data(), output.data(), input.size(), kpu_dma_plic_thunk, &ctx); + return kcr_async; + } +#endif kernels::k210::kpu_upload(input.data(), output.data(), options.in_shape); return kcr_done; } diff --git a/lib/nncase/runtime/kernel_registry.cpp b/lib/nncase/runtime/kernel_registry.cpp index e7e1ffe..97ea038 100644 --- a/lib/nncase/runtime/kernel_registry.cpp +++ b/lib/nncase/runtime/kernel_registry.cpp @@ -1,20 +1,36 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include #include +#include #include -#include -#include -#include using namespace nncase; using namespace nncase::runtime; namespace nncase { -namespace targets +namespace runtime { #define BEGINE_DEFINE_TARGET(target) \ namespace target \ { +#define DEFINE_NEUTRAL_RUNTIME_OP(id, name, value) \ + kernel_call_result id(id##_options &, interpreter_t &, interpreter_step_t); #define DEFINE_RUNTIME_OP(target, id, name, value) \ kernel_call_result id(id##_options &, interpreter_t &, interpreter_step_t); @@ -23,6 +39,7 @@ namespace targets #include #undef BEGINE_DEFINE_TARGET +#undef DEFINE_NEUTRAL_RUNTIME_OP #undef DEFINE_RUNTIME_OP #undef END_DEFINE_TARGET } @@ -35,18 +52,26 @@ kernel_call_result runtime::call_kernel(runtime_opcode opcode, xtl::span #undef BEGINE_DEFINE_TARGET +#undef DEFINE_NEUTRAL_RUNTIME_OP #undef DEFINE_RUNTIME_OP #undef END_DEFINE_TARGET default: diff --git a/lib/nncase/targets/neutral/neutral_ops.cpp b/lib/nncase/runtime/neutral/neutral_ops.cpp similarity index 72% rename from lib/nncase/targets/neutral/neutral_ops.cpp rename to lib/nncase/runtime/neutral/neutral_ops.cpp index f240666..8ce72dd 100644 --- a/lib/nncase/targets/neutral/neutral_ops.cpp +++ b/lib/nncase/runtime/neutral/neutral_ops.cpp @@ -1,6 +1,20 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include #include -#include +#include using namespace nncase; using namespace nncase::runtime; @@ -23,7 +37,7 @@ using namespace nncase::runtime; namespace nncase { -namespace targets +namespace runtime { namespace neutral { @@ -51,6 +65,12 @@ namespace targets case binary_div: binary([](auto a, auto b) { return a / b; }); return kcr_done; + case binary_min: + binary([](auto a, auto b) { return std::min(a, b); }); + return kcr_done; + case binary_max: + binary([](auto a, auto b) { return std::max(a, b); }); + return kcr_done; default: return kcr_error; } @@ -146,6 +166,9 @@ namespace targets case reduce_max: reduce([](auto a, auto b) { return std::max(a, b); }); return kcr_done; + case reduce_sum: + reduce([](auto a, auto b) { return a + b; }); + return kcr_done; default: return kcr_error; } @@ -172,31 +195,33 @@ namespace targets case reduce_max: reduce([](auto a, auto b) { return std::max(a, b); }, [](auto v, auto k) { return v; }); return kcr_done; + case reduce_sum: + reduce([](auto a, auto b) { return a + b; }, [](auto v, auto k) { return v; }); + return kcr_done; default: return kcr_error; } } - kernel_call_result resize_bilinear(resize_bilinear_options &options, interpreter_t &interpreter, interpreter_step_t step) + kernel_call_result resize_image(resize_image_options &options, interpreter_t &interpreter, interpreter_step_t step) { auto input = interpreter.memory_at(options.input); auto output = interpreter.memory_at(options.output); - kernels::neutral::resize_bilinear(input.data(), output.data(), options.in_shape, options.out_h, options.out_w, options.align_corners); - return kcr_done; - } - - kernel_call_result resize_nearest_neighbor(resize_nearest_neighbor_options &options, interpreter_t &interpreter, runtime::interpreter_step_t step) - { - auto input = interpreter.memory_at(options.input); - auto output = interpreter.memory_at(options.output); - + if (options.mode == image_resize_bilinear) + { + kernels::neutral::resize_bilinear(input.data(), output.data(), options.in_shape, options.out_h, options.out_w, options.align_corners); + return kcr_done; + } + else + { #define RESIZE_NN_KERNEL(T) \ kernels::neutral::resize_nearest_neighbor(reinterpret_cast(input.data()), reinterpret_cast(output.data()), options.in_shape, options.out_h, options.out_w); - ELEM_SIZE_IMPL(options.input.datatype, RESIZE_NN_KERNEL); - return kcr_done; + ELEM_SIZE_IMPL(options.input.datatype, RESIZE_NN_KERNEL); + return kcr_done; #undef RESIZE_NN_KERNEL + } } kernel_call_result softmax(softmax_options &options, interpreter_t &interpreter, interpreter_step_t step) @@ -233,6 +258,52 @@ namespace targets return kcr_done; #undef STRIDED_SLICE_KERNEL } + + kernel_call_result unary(unary_options &options, interpreter_t &interpreter, interpreter_step_t step) + { + auto input = interpreter.memory_at(options.input); + auto output = interpreter.memory_at(options.output); + + auto unary = [&](auto unary_op) { + kernels::neutral::unary(input.data(), output.data(), input.size(), unary_op); + }; + + switch (options.unary_op) + { + case unary_abs: + unary([](auto a) { return fabs(a); }); + return kcr_done; + case unary_ceil: + unary([](auto a) { return ceilf(a); }); + return kcr_done; + case unary_cos: + unary([](auto a) { return cosf(a); }); + return kcr_done; + case unary_exp: + unary([](auto a) { return expf(a); }); + return kcr_done; + case unary_floor: + unary([](auto a) { return floorf(a); }); + return kcr_done; + case unary_log: + unary([](auto a) { return logf(a); }); + return kcr_done; + case unary_neg: + unary([](auto a) { return -a; }); + return kcr_done; + case unary_rsqrt: + unary([](auto a) { return 1.f / sqrtf(a); }); + return kcr_done; + case unary_sin: + unary([](auto a) { return sinf(a); }); + return kcr_done; + case unary_square: + unary([](auto a) { return a * a; }); + return kcr_done; + default: + return kcr_error; + } + } } } }