代码拉取完成,页面将自动刷新
/*
* Copyright (C) 2016-2021 Canonical, Ltd.
* Copyright (C) 2022 Colin Ian King.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* This stressor is loosely based on the STREAM Sustainable
* Memory Bandwidth In High Performance Computers tool.
* https://www.cs.virginia.edu/stream/
* https://www.cs.virginia.edu/stream/FTP/Code/stream.c
*
* This is loosely based on a variant of the STREAM benchmark code,
* so DO NOT submit results based on this as it is intended to
* stress memory and compute and NOT intended for STREAM accurate
* tuned or non-tuned benchmarking whatsoever. I believe this
* conforms to section 3a, 3b of the original License.
*
*/
#include "stress-ng.h"
#include "core-cpu.h"
#include "core-nt-store.h"
#define MIN_STREAM_L3_SIZE (4 * KB)
#define MAX_STREAM_L3_SIZE (MAX_MEM_LIMIT)
#define DEFAULT_STREAM_L3_SIZE (4 * MB)
#if defined(HAVE_NT_STORE_DOUBLE)
#define NT_STORE(dst, src) stress_nt_store_double(&dst, src)
#endif
#define STORE(dst, src) dst = src
typedef struct {
const char *name;
const int advice;
} stress_stream_madvise_info_t;
static const stress_help_t help[] = {
{ NULL, "stream N", "start N workers exercising memory bandwidth" },
{ NULL, "stream-ops N", "stop after N bogo stream operations" },
{ NULL, "stream-index", "specify number of indices into the data (0..3)" },
{ NULL, "stream-l3-size N", "specify the L3 cache size of the CPU" },
{ NULL, "stream-madvise M", "specify mmap'd stream buffer madvise advice" },
{ NULL, NULL, NULL }
};
static const stress_stream_madvise_info_t stream_madvise_info[] = {
#if defined(HAVE_MADVISE)
#if defined(MADV_HUGEPAGE)
{ "hugepage", MADV_HUGEPAGE },
#endif
#if defined(MADV_NOHUGEPAGE)
{ "nohugepage", MADV_NOHUGEPAGE },
#endif
#if defined(MADV_NORMAL)
{ "normal", MADV_NORMAL },
#endif
#else
/* No MADVISE, default to normal, ignored */
{ "normal", 0 },
#endif
{ NULL, 0 },
};
static int stress_set_stream_L3_size(const char *opt)
{
uint64_t stream_L3_size;
stream_L3_size = stress_get_uint64_byte(opt);
stress_check_range_bytes("stream-L3-size", stream_L3_size,
MIN_STREAM_L3_SIZE, MAX_STREAM_L3_SIZE);
return stress_set_setting("stream-L3-size", TYPE_ID_UINT64, &stream_L3_size);
}
static int stress_set_stream_madvise(const char *opt)
{
const stress_stream_madvise_info_t *info;
for (info = stream_madvise_info; info->name; info++) {
if (!strcmp(opt, info->name)) {
stress_set_setting("stream-madvise", TYPE_ID_INT, &info->advice);
return 0;
}
}
(void)fprintf(stderr, "invalid stream-madvise advice '%s', allowed advice options are:", opt);
for (info = stream_madvise_info; info->name; info++) {
(void)fprintf(stderr, " %s", info->name);
}
(void)fprintf(stderr, "\n");
return -1;
}
static int stress_set_stream_index(const char *opt)
{
uint32_t stream_index;
stream_index = stress_get_uint32(opt);
stress_check_range("stream-index", stream_index, 0, 3);
return stress_set_setting("stream-index", TYPE_ID_UINT32, &stream_index);
}
static inline void OPTIMIZE3 stress_stream_copy_index0(
double *RESTRICT c,
const double *RESTRICT a,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[i], a[i]);
}
#if defined(HAVE_NT_STORE_DOUBLE)
static inline void OPTIMIZE3 stress_stream_copy_index0_nt(
double *RESTRICT c,
const double *RESTRICT a,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
NT_STORE(c[i], a[i]);
}
#endif
static inline void OPTIMIZE3 stress_stream_copy_index1(
double *RESTRICT c,
const double *RESTRICT a,
const size_t *RESTRICT idx1,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[idx1[i]], a[idx1[i]]);
}
static inline void OPTIMIZE3 stress_stream_copy_index2(
double *RESTRICT c,
const double *RESTRICT a,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[idx1[i]], a[idx2[i]]);
}
static inline void OPTIMIZE3 stress_stream_copy_index3(
double *RESTRICT c,
const double *RESTRICT a,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const size_t *RESTRICT idx3,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[idx3[idx1[i]]], a[idx2[i]]);
}
static inline void OPTIMIZE3 stress_stream_scale_index0(
double *RESTRICT b,
const double *RESTRICT c,
const double q,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(b[i], q * c[i]);
}
#if defined(HAVE_NT_STORE_DOUBLE)
static inline void OPTIMIZE3 stress_stream_scale_index0_nt(
double *RESTRICT b,
const double *RESTRICT c,
const double q,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
NT_STORE(b[i], q * c[i]);
}
#endif
static inline void OPTIMIZE3 stress_stream_scale_index1(
double *RESTRICT b,
const double *RESTRICT c,
const double q,
const size_t *RESTRICT idx1,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(b[idx1[i]], q * c[idx1[i]]);
}
static inline void OPTIMIZE3 stress_stream_scale_index2(
double *RESTRICT b,
const double *RESTRICT c,
const double q,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(b[idx1[i]], q * c[idx2[i]]);
}
static inline void OPTIMIZE3 stress_stream_scale_index3(
double *RESTRICT b,
const double *RESTRICT c,
const double q,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const size_t *RESTRICT idx3,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(b[idx3[idx1[i]]], q * c[idx2[i]]);
}
static inline void OPTIMIZE3 stress_stream_add_index0(
const double *RESTRICT a,
const double *RESTRICT b,
double *RESTRICT c,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[i], a[i] + b[i]);
}
#if defined(HAVE_NT_STORE_DOUBLE)
static inline void OPTIMIZE3 stress_stream_add_index0_nt(
const double *RESTRICT a,
const double *RESTRICT b,
double *RESTRICT c,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
NT_STORE(c[i], a[i] + b[i]);
}
#endif
static inline void OPTIMIZE3 stress_stream_add_index1(
const double *RESTRICT a,
const double *RESTRICT b,
double *RESTRICT c,
const size_t *RESTRICT idx1,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[idx1[i]], a[idx1[i]] + b[idx1[i]]);
}
static inline void OPTIMIZE3 stress_stream_add_index2(
const double *RESTRICT a,
const double *RESTRICT b,
double *RESTRICT c,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[idx1[i]], a[idx2[i]] + b[idx1[i]]);
}
static inline void OPTIMIZE3 stress_stream_add_index3(
const double *RESTRICT a,
const double *RESTRICT b,
double *RESTRICT c,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const size_t *RESTRICT idx3,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(c[idx1[i]], a[idx2[i]] + b[idx3[i]]);
}
static inline void OPTIMIZE3 stress_stream_triad_index0(
double *RESTRICT a,
const double *RESTRICT b,
const double *RESTRICT c,
const double q,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(a[i], b[i] + (c[i] * q));
}
#if defined(HAVE_NT_STORE_DOUBLE)
static inline void OPTIMIZE3 stress_stream_triad_index0_nt(
double *RESTRICT a,
const double *RESTRICT b,
const double *RESTRICT c,
const double q,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
NT_STORE(a[i], b[i] + (c[i] * q));
}
#endif
static inline void OPTIMIZE3 stress_stream_triad_index1(
double *RESTRICT a,
const double *RESTRICT b,
const double *RESTRICT c,
const double q,
const size_t *RESTRICT idx1,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(a[idx1[i]], b[idx1[i]] + (c[idx1[i]] * q));
}
static inline void OPTIMIZE3 stress_stream_triad_index2(
double *RESTRICT a,
const double *RESTRICT b,
const double *RESTRICT c,
const double q,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(a[idx1[i]], b[idx2[i]] + (c[idx1[i]] * q));
}
static inline void OPTIMIZE3 stress_stream_triad_index3(
double *RESTRICT a,
const double *RESTRICT b,
const double *RESTRICT c,
const double q,
const size_t *RESTRICT idx1,
const size_t *RESTRICT idx2,
const size_t *RESTRICT idx3,
const uint64_t n)
{
register uint64_t i;
for (i = 0; i < n; i++)
STORE(a[idx1[i]], b[idx2[i]] + (c[idx3[i]] * q));
}
static void stress_stream_init_data(
double *RESTRICT data,
const uint64_t n)
{
uint64_t i;
const double divisor = 1.0 / (double)(4294967296ULL);
for (i = 0; i < n; i++) {
const uint32_t r1 = stress_mwc32();
STORE(data[i], (double)r1 * divisor);
}
}
static inline void *stress_stream_mmap(const stress_args_t *args, uint64_t sz)
{
void *ptr;
ptr = mmap(NULL, (size_t)sz, PROT_READ | PROT_WRITE,
#if defined(MAP_POPULATE)
MAP_POPULATE |
#endif
#if defined(HAVE_MADVISE)
MAP_PRIVATE |
#else
MAP_SHARED |
#endif
MAP_ANONYMOUS, -1, 0);
/* Coverity Scan believes NULL can be returned, doh */
if (!ptr || (ptr == MAP_FAILED)) {
pr_err("%s: cannot allocate %" PRIu64 " bytes\n",
args->name, sz);
ptr = MAP_FAILED;
} else {
#if defined(HAVE_MADVISE)
int ret, advice = MADV_NORMAL;
(void)stress_get_setting("stream-madvise", &advice);
ret = madvise(ptr, sz, advice);
(void)ret;
#else
UNEXPECTED
#endif
}
return ptr;
}
static inline uint64_t get_stream_L3_size(const stress_args_t *args)
{
uint64_t cache_size = MEM_CACHE_SIZE;
#if defined(__linux__)
stress_cpus_t *cpu_caches;
stress_cpu_cache_t *cache = NULL;
uint16_t max_cache_level;
cpu_caches = stress_get_all_cpu_cache_details();
if (!cpu_caches) {
if (!args->instance)
pr_inf("%s: using built-in defaults as unable to "
"determine cache details\n", args->name);
return cache_size;
}
max_cache_level = stress_get_max_cache_level(cpu_caches);
if ((max_cache_level > 0) && (max_cache_level < 3) && (!args->instance))
pr_inf("%s: no L3 cache, using L%" PRIu16 " size instead\n",
args->name, max_cache_level);
cache = stress_get_cpu_cache(cpu_caches, max_cache_level);
if (!cache) {
if (!args->instance)
pr_inf("%s: using built-in defaults as no suitable "
"cache found\n", args->name);
stress_free_cpu_caches(cpu_caches);
return cache_size;
}
if (!cache->size) {
if (!args->instance)
pr_inf("%s: using built-in defaults as unable to "
"determine cache size\n", args->name);
stress_free_cpu_caches(cpu_caches);
return cache_size;
}
cache_size = cache->size;
stress_free_cpu_caches(cpu_caches);
#else
if (!args->instance)
pr_inf("%s: using built-in defaults as unable to "
"determine cache details\n", args->name);
#endif
return cache_size;
}
static void stress_stream_init_index(
size_t *RESTRICT idx,
const uint64_t n)
{
uint64_t i;
for (i = 0; i < n; i++)
idx[i] = i;
for (i = 0; i < n; i++) {
register uint64_t j = stress_mwc64() % n;
register uint64_t tmp;
tmp = idx[i];
idx[i] = idx[j];
idx[j] = tmp;
}
}
/*
* stress_stream()
* stress cache/memory/CPU with stream stressors
*/
static int stress_stream(const stress_args_t *args)
{
int rc = EXIT_FAILURE;
double *a, *b, *c;
size_t *idx1 = NULL, *idx2 = NULL, *idx3 = NULL;
const double q = 3.0;
double mb_rate, mb, fp_rate, fp, t1, t2, dt;
uint32_t stream_index = 0;
uint64_t L3, sz, n, sz_idx;
uint64_t stream_L3_size = DEFAULT_STREAM_L3_SIZE;
bool guess = false;
#if defined(HAVE_NT_STORE_DOUBLE)
const bool has_sse2 = stress_cpu_x86_has_sse2();
#endif
if (stress_get_setting("stream-L3-size", &stream_L3_size))
L3 = stream_L3_size;
else
L3 = get_stream_L3_size(args);
(void)stress_get_setting("stream-index", &stream_index);
/* Have to take a hunch and badly guess size */
if (!L3) {
guess = true;
L3 = (uint64_t)stress_get_processors_configured() * DEFAULT_STREAM_L3_SIZE;
}
if (args->instance == 0) {
pr_inf("%s: stressor loosely based on a variant of the "
"STREAM benchmark code\n", args->name);
pr_inf("%s: do NOT submit any of these results "
"to the STREAM benchmark results\n", args->name);
if (guess) {
pr_inf("%s: cannot determine CPU L3 cache size, "
"defaulting to %" PRIu64 "K\n",
args->name, L3 / 1024);
} else {
pr_inf("%s: Using CPU cache size of %" PRIu64 "K\n",
args->name, L3 / 1024);
}
}
/* ..and shared amongst all the STREAM stressor instances */
L3 /= args->num_instances;
if (L3 < args->page_size)
L3 = args->page_size;
/*
* Each array must be at least 4 x the
* size of the L3 cache
*/
sz = (L3 * 4);
n = sz / sizeof(*a);
a = stress_stream_mmap(args, sz);
if (a == MAP_FAILED)
goto err_a;
b = stress_stream_mmap(args, sz);
if (b == MAP_FAILED)
goto err_b;
c = stress_stream_mmap(args, sz);
if (c == MAP_FAILED)
goto err_c;
sz_idx = n * sizeof(size_t);
switch (stream_index) {
case 3:
idx3 = stress_stream_mmap(args, sz_idx);
if (idx3 == MAP_FAILED)
goto err_idx3;
stress_stream_init_index(idx3, n);
CASE_FALLTHROUGH;
case 2:
idx2 = stress_stream_mmap(args, sz_idx);
if (idx2 == MAP_FAILED)
goto err_idx2;
stress_stream_init_index(idx2, n);
CASE_FALLTHROUGH;
case 1:
idx1 = stress_stream_mmap(args, sz_idx);
if (idx1 == MAP_FAILED)
goto err_idx1;
stress_stream_init_index(idx1, n);
CASE_FALLTHROUGH;
case 0:
default:
break;
}
stress_stream_init_data(a, n);
stress_stream_init_data(b, n);
stress_stream_init_data(c, n);
stress_set_proc_state(args->name, STRESS_STATE_RUN);
t1 = stress_time_now();
do {
switch (stream_index) {
case 3:
stress_stream_copy_index3(c, a, idx1, idx2, idx3, n);
stress_stream_scale_index3(b, c, q, idx1, idx2, idx3, n);
stress_stream_add_index3(c, b, a, idx1, idx2, idx3, n);
stress_stream_triad_index3(a, b, c, q, idx1, idx2, idx3, n);
break;
case 2:
stress_stream_copy_index2(c, a, idx1, idx2, n);
stress_stream_scale_index2(b, c, q, idx1, idx2, n);
stress_stream_add_index2(c, b, a, idx1, idx2, n);
stress_stream_triad_index2(a, b, c, q, idx1, idx2, n);
break;
case 1:
stress_stream_copy_index1(c, a, idx1, n);
stress_stream_scale_index1(b, c, q, idx1, n);
stress_stream_add_index1(c, b, a, idx1, n);
stress_stream_triad_index1(a, b, c, q, idx1, n);
break;
case 0:
default:
#if defined(HAVE_NT_STORE_DOUBLE)
if (has_sse2) {
stress_stream_copy_index0_nt(c, a, n);
stress_stream_scale_index0_nt(b, c, q, n);
stress_stream_add_index0_nt(c, b, a, n);
stress_stream_triad_index0_nt(a, b, c, q, n);
break;
}
#endif
stress_stream_copy_index0(c, a, n);
stress_stream_scale_index0(b, c, q, n);
stress_stream_add_index0(c, b, a, n);
stress_stream_triad_index0(a, b, c, q, n);
break;
}
inc_counter(args);
} while (keep_stressing(args));
t2 = stress_time_now();
mb = ((double)(get_counter(args) * 10) * (double)sz) / (double)MB;
fp = ((double)(get_counter(args) * 4) * (double)sz) / (double)MB;
dt = t2 - t1;
if (dt >= 4.5) {
mb_rate = mb / (dt);
fp_rate = fp / (dt);
pr_inf("%s: memory rate: %.2f MB/sec, %.2f Mflop/sec"
" (instance %" PRIu32 ")\n",
args->name, mb_rate, fp_rate, args->instance);
stress_misc_stats_set(args->misc_stats, 0, "memory rate (MB per sec)", mb_rate);
stress_misc_stats_set(args->misc_stats, 1, "memory rate (Mflop per sec)", fp_rate);
} else {
if (args->instance == 0)
pr_inf("%s: run duration too short to determine memory rate\n", args->name);
}
rc = EXIT_SUCCESS;
stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
if (idx3)
(void)munmap((void *)idx3, sz_idx);
err_idx3:
stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
if (idx2)
(void)munmap((void *)idx2, sz_idx);
err_idx2:
stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
if (idx1)
(void)munmap((void *)idx1, sz_idx);
err_idx1:
stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
(void)munmap((void *)c, sz);
err_c:
stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
(void)munmap((void *)b, sz);
err_b:
stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
(void)munmap((void *)a, sz);
err_a:
stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
return rc;
}
static const stress_opt_set_func_t opt_set_funcs[] = {
{ OPT_stream_index, stress_set_stream_index },
{ OPT_stream_l3_size, stress_set_stream_L3_size },
{ OPT_stream_madvise, stress_set_stream_madvise },
{ 0, NULL }
};
stressor_info_t stress_stream_info = {
.stressor = stress_stream,
.class = CLASS_CPU | CLASS_CPU_CACHE | CLASS_MEMORY,
.opt_set_funcs = opt_set_funcs,
.help = help
};
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。