498 lines
16 KiB
Plaintext
498 lines
16 KiB
Plaintext
Memory accesses should be bounded to prevent buffer overflows.
|
|
|
|
== Why is this an issue?
|
|
|
|
Array overruns and buffer overflows occur when a memory access accidentally goes beyond the boundary of the allocated array or buffer.
|
|
|
|
These overreaching accesses cause some of the most damaging and hard to track defects.
|
|
Not only do these faulty accesses constitute undefined behavior, but they frequently introduce security vulnerabilities, too.
|
|
|
|
|
|
== What is the potential impact?
|
|
|
|
Accessing memory that goes beyond the boundary of the allocated array or buffer results in undefined behavior.
|
|
|
|
For programs that exercise undefined behavior, the compiler is no longer bound by the language specification.
|
|
The application may crash or, even worse, the application may appear to execute correctly while losing data or producing incorrect results.
|
|
|
|
Out-of-bounds memory accesses cause some of the most damaging and hard-to-track defects.
|
|
Unbounded memory accesses may also introduce severe security vulnerabilities.
|
|
In case a user of the program can control the contents of a buffer, for instance, a malicious attacker can use the buffer overflow to overwrite critical data, leak secret or private information, execute arbitrary code, or gain unauthorized access to the system.
|
|
|
|
|
|
== How to fix it
|
|
|
|
Ensure that memory accesses are within the bounds of the allocated array or buffer.
|
|
|
|
If {cpp} is available, use the `std::array` or `std::vector` types to manage stack or heap-based arrays, respectively and use `std::string` to manage string buffers.
|
|
|
|
|
|
=== Code examples
|
|
|
|
==== Noncompliant code example
|
|
|
|
[source,c,diff-id=1,diff-type=noncompliant]
|
|
----
|
|
void access_exceeds(void) {
|
|
int id_sequence[3];
|
|
id_sequence[0] = 100;
|
|
id_sequence[1] = 200;
|
|
id_sequence[2] = 300;
|
|
id_sequence[3] = 400; // Noncompliant: memory access is out of bounds
|
|
// Accessed memory exceeds upper limit of memory block
|
|
}
|
|
|
|
void access_precedes(int x) {
|
|
int buf[100];
|
|
int *p = buf;
|
|
--p;
|
|
p[0] = 9001; // Noncompliant: memory access is out of bounds
|
|
// Accessed memory precedes memory block
|
|
}
|
|
----
|
|
|
|
==== Compliant solution
|
|
|
|
[source,c,diff-id=1,diff-type=compliant]
|
|
----
|
|
void access_exceeds(void) {
|
|
int id_sequence[3];
|
|
// Compliant: all memory accesses are within valid bounds between 0 and 2
|
|
id_sequence[0] = 100;
|
|
id_sequence[1] = 200;
|
|
id_sequence[2] = 300;
|
|
}
|
|
|
|
void access_precedes(int x) {
|
|
int buf[100];
|
|
int *p = buf;
|
|
p[0] = 9001; // Compliant: memory access within valid bounds
|
|
}
|
|
----
|
|
|
|
==== Noncompliant code example
|
|
|
|
[source,c,diff-id=2,diff-type=noncompliant]
|
|
----
|
|
#include <string.h>
|
|
|
|
void memcpy_fun(void) {
|
|
char src[] = {1, 2, 3, 4};
|
|
char dst[10];
|
|
memcpy(dst, src, 5); // Noncompliant: memory copy function accesses out-of-bound array element
|
|
}
|
|
----
|
|
|
|
==== Compliant solution
|
|
|
|
[source,c,diff-id=2,diff-type=compliant]
|
|
----
|
|
#include <string.h>
|
|
|
|
void memcpy_fun(void) {
|
|
char src[] = {1, 2, 3, 4};
|
|
char dst[10];
|
|
memcpy(dst, src, sizeof(src)); // Compliant: memory copy function accesses only valid array elements
|
|
}
|
|
----
|
|
|
|
==== Noncompliant code example
|
|
|
|
[source,c,diff-id=3,diff-type=noncompliant]
|
|
----
|
|
#include <stdio.h>
|
|
|
|
int fun(void) {
|
|
char buffer[64] = {0};
|
|
if (fgets(buffer, 2048, stdin) != NULL) { // Noncompliant: target buffer may overflow
|
|
printf("buffer contents: %s", buffer);
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
----
|
|
|
|
==== Compliant solution
|
|
|
|
[source,c,diff-id=3,diff-type=compliant]
|
|
----
|
|
#include <stdio.h>
|
|
|
|
int fun(void) {
|
|
char buffer[64] = {0};
|
|
if (fgets(buffer, 64, stdin) != NULL) { // Compliant: reads at most 64 - 1 characters
|
|
printf("buffer contents: %s", buffer);
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
----
|
|
|
|
==== Noncompliant code example
|
|
|
|
[source,c,diff-id=4,diff-type=noncompliant]
|
|
----
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
int foo(void) {
|
|
const char *const str = "AAAA"
|
|
"BBBB"
|
|
"CCCC"
|
|
"DDDD";
|
|
size_t str_len = strlen(str);
|
|
char *buf1 = (char *)malloc(str_len + /*null terminator*/ 1);
|
|
if (buf1 == NULL) { return 1; }
|
|
memset(buf1, 0, str_len + 1);
|
|
strncpy(buf1, str, str_len);
|
|
char buf2[8] = {0};
|
|
memcpy(buf2, buf1, str_len + 1); // Noncompliant: buf2 will overflow.
|
|
printf("buf1: %s\n", buf1);
|
|
printf("buf2: %s\n", buf2);
|
|
free(buf1);
|
|
return 0;
|
|
}
|
|
----
|
|
|
|
==== Compliant solution
|
|
|
|
[source,c,diff-id=4,diff-type=compliant]
|
|
----
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
int foo(void) {
|
|
const char *const str = "AAAA"
|
|
"BBBB"
|
|
"CCCC"
|
|
"DDDD";
|
|
size_t str_len = strlen(str);
|
|
char *buf1 = (char *)malloc(str_len + /*null terminator*/ 1);
|
|
if (buf1 == NULL) { return 1; }
|
|
memset(buf1, 0, str_len + 1);
|
|
strncpy(buf1, str, str_len + 1);
|
|
char buf2[8] = {0};
|
|
// Compliant: copy only `sizeof(buf2) - 1` bytes and leave the last
|
|
// terminating null byte ('\0') untouched such that `buf2` can be correctly
|
|
// printed in the subsequent lines.
|
|
memcpy(buf2, buf1, sizeof(buf2) - 1);
|
|
printf("buf1: %s\n", buf1);
|
|
printf("buf2: %s\n", buf2);
|
|
free(buf1);
|
|
return 0;
|
|
}
|
|
----
|
|
|
|
==== Noncompliant code example
|
|
|
|
[source,c,diff-id=5,diff-type=noncompliant]
|
|
----
|
|
#include <stdio.h>
|
|
|
|
void bar(void) {
|
|
int array[8] = {1, 2, 3, 4, 5, 6, 7, 8};
|
|
array[8] = 42; // Noncompliant: index should be between 0 and 7
|
|
for (size_t i = 0; i < 8; ++i) {
|
|
printf("%d\n", array[i]); // Compliant: index is between 0 and 7
|
|
}
|
|
}
|
|
----
|
|
|
|
==== Compliant solution
|
|
|
|
[source,c,diff-id=5,diff-type=compliant]
|
|
----
|
|
#include <stdio.h>
|
|
|
|
void bar(void) {
|
|
int array[8] = {1, 2, 3, 4, 5, 6, 7, 8};
|
|
array[7] = 42; // Compliant: index is between 0 and 7
|
|
for (size_t i = 0; i < 8; ++i) {
|
|
printf("%d\n", array[i]); // Compliant: index is between 0 and 7
|
|
}
|
|
}
|
|
----
|
|
|
|
=== Pitfalls
|
|
|
|
When managing and manipulating string buffers one needs to take great care that the buffers are correctly terminated with a null byte (`'\0'`).
|
|
Failing to correctly null terminate string buffers almost always causes buffer overruns and introduces undefined behavior.
|
|
|
|
The following faulty program aims at creating a string copy.
|
|
However, the heap-allocated memory buffer for holding the copy is too small since `strlen` calculates the length of its input string but excludes the "invisible" terminating null byte (`'\0'`).
|
|
The call to `strncpy` also uses the string length computed by `strlen` and the string copy hence lacks an implicitly copied null terminator.
|
|
This causes out-of-bounds reads and introduces undefined behavior in the subsequent processing steps.
|
|
In this example, the call to `printf` will eventually trigger such erroneous behavior.
|
|
|
|
[source,c,diff-id=6,diff-type=compliant]
|
|
----
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
char *make_str_copy(const char *const src) {
|
|
size_t src_len = strlen(src);
|
|
char *dst = (char *)malloc(src_len); // buffer too small to hold null terminator
|
|
if (dst == NULL) {
|
|
perror("malloc failed");
|
|
exit(1);
|
|
}
|
|
strncpy(dst, src, src_len); // null terminator is not (implicitly) copied either
|
|
return dst;
|
|
}
|
|
|
|
int main(void) {
|
|
const char str[] = "Hello, World!";
|
|
char *str_copy = make_str_copy(str);
|
|
// call to `printf` will trigger undefined behavior due to missing null terminator
|
|
printf("%s\n", str_copy); // Noncompliant: `str_copy` has not been null terminated
|
|
// More code that processes `str_copy` ...
|
|
free(str_copy);
|
|
return 0;
|
|
}
|
|
----
|
|
|
|
The following fixed version of the program does correctly allocate a heap-based buffer of sufficient size _and_ (explicitly) terminates it with a null byte (`'\0'`):
|
|
|
|
[source,c,diff-id=6,diff-type=noncompliant]
|
|
----
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
char *make_str_copy(const char *const src) {
|
|
size_t src_len = strlen(src);
|
|
// `src_len + 1` since `strlen` _excludes_ the terminating null byte
|
|
char *dst = (char *)malloc(src_len + 1);
|
|
if (dst == NULL) {
|
|
perror("malloc failed");
|
|
exit(1);
|
|
}
|
|
dst[src_len] = '\0'; // explicitly add a null terminator
|
|
strncpy(dst, src, src_len);
|
|
return dst;
|
|
}
|
|
|
|
int main(void) {
|
|
const char str[] = "Hello, World!";
|
|
char *str_copy = make_str_copy(str);
|
|
printf("%s\n", str_copy); // Compliant: `str_copy` has been correctly null terminated
|
|
// More code that processes `str_copy` ...
|
|
free(str_copy);
|
|
return 0;
|
|
}
|
|
----
|
|
|
|
|
|
=== Going the extra mile
|
|
|
|
Buffer overflows occur when a program writes data beyond the boundaries of a buffer and can lead to memory corruption and potential security vulnerabilities.
|
|
To mitigate this risk, developers must carefully manage array and buffer sizes.
|
|
This includes using secure coding practices, and employing techniques like input validation and bounds checking.
|
|
|
|
Various modern C and {cpp} compilers allow one to automatically instrument a program during compilation using so-called _sanitizers_.
|
|
The address and undefined behavior sanitizers, for instance, aim at detecting out-of-bound memory accesses and undefined behavior, respectively.
|
|
If the instrumentation detects an error at runtime, the program will abort with an error message that provides valuable information for identifying and fixing the error.
|
|
|
|
Additional capabilities of modern compilers that aim at hardening the binary include the `FORTIFY_SOURCE` compilation flag, or features such as stack canaries or address space layout randomization (ASLR).
|
|
These hardening features provide some lightweight support for detecting buffer overflows.
|
|
|
|
Yet another programming tool for memory debugging is Valgrind.
|
|
Valgrind inspects a programming during its execution and is capable of detecting various memory-related issues including buffer overflows.
|
|
|
|
In {cpp}, manual array or string, i.e., buffer manipulations are considered a code smell.
|
|
|
|
Instead, the `std::array` type should be used to manage stack-based arrays, and the `std::vector` type should be used if a heap-based array is desired.
|
|
Besides always carrying their respective sizes, i.e., number of elements, `std::array` and `std::vector` implement many useful member functions such as `begin()` and `end()`, allowing one to safely and conveniently process them using algorithms from the {cpp}'s `<algorithm>` header, for instance.
|
|
An example is shown in the following:
|
|
|
|
[source,cpp]
|
|
----
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <iostream>
|
|
#include <numeric>
|
|
#include <vector>
|
|
|
|
void bar() {
|
|
// stack-based array
|
|
std::array<int, 8> stack_buf;
|
|
std::fill(stack_buf.begin(), stack_buf.end(), 42);
|
|
for (auto i : stack_buf) {
|
|
std::cout << i << ' ';
|
|
}
|
|
std::cout << '\n';
|
|
std::cout << "sum of stack_buf's values: "
|
|
<< std::accumulate(stack_buf.begin(), stack_buf.end(), 0) << '\n';
|
|
// heap-based array
|
|
std::vector<int> heap_buf = {1, 2, 3, 4};
|
|
heap_buf.resize(10);
|
|
std::iota(heap_buf.begin(), heap_buf.end(), 1);
|
|
std::cout << "sum of heap_buf's values: "
|
|
<< std::accumulate(heap_buf.begin(), heap_buf.end(), 0) << '\n';
|
|
}
|
|
----
|
|
|
|
The `std::string` type should be used to manage buffers since it facilitates safe buffer manipulations.
|
|
Instead of manually concatenating two buffers using `strncat`, for instance, `std::string` allows this operation to be performed in a much more convenient manner as shown in the following code:
|
|
|
|
[source,cpp]
|
|
----
|
|
#include <iostream>
|
|
#include <string>
|
|
|
|
void buz(std::string const &s) {
|
|
std::string t = "Hello, " + s;
|
|
std::cout << t << '\n';
|
|
}
|
|
----
|
|
|
|
In addition, the `std::format` function allows one to format strings according to a user-specified format and returns the result as a string as shown in what follows:
|
|
|
|
[source, cpp]
|
|
----
|
|
#include <format>
|
|
#include <iostream>
|
|
#include <string>
|
|
|
|
void tar(std::string const &s) {
|
|
std::string t = std::format("Hello, World! Greetings {}\n", s);
|
|
std::cout << t << '\n';
|
|
}
|
|
----
|
|
|
|
|
|
== Resources
|
|
|
|
=== Articles & blog posts
|
|
|
|
* Clang Hardening - https://blog.quarkslab.com/clang-hardening-cheat-sheet.html[Clang Hardening Cheat Sheet]
|
|
* FORTIFY_SOURCE - https://www.redhat.com/en/blog/enhance-application-security-fortifysource[Enhance application security with FORTIFY_SOURCE]
|
|
* Stack Protection - https://developers.redhat.com/articles/2022/06/02/use-compiler-flags-stack-protection-gcc-and-clang[Use compiler flags for stack protection in GCC and Clang]
|
|
* Valgrind - https://valgrind.org/docs/manual/quick-start.html[The Valgrind Quick Start Guide]
|
|
|
|
=== Conference presentations
|
|
|
|
* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your {cpp} code]
|
|
* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and {cpp}]
|
|
* CppCon 2020 - https://youtu.be/xEzfnbTabyE?si=9yJQkrcRKn6tuPaV[2020: The Year of Sanitizers?]
|
|
|
|
=== Standards
|
|
|
|
* CERT - https://wiki.sei.cmu.edu/confluence/x/wtYxBQ[ARR30-C. Do not form or use out-of-bounds pointers or array subscripts]
|
|
* CERT - https://wiki.sei.cmu.edu/confluence/x/i3w-BQ[STR50-CPP. Guarantee that storage for strings has sufficient space for character data and the null terminator]
|
|
* CWE - https://cwe.mitre.org/data/definitions/119[CWE-119 Improper Restriction of Operations within the Bounds of a Memory Buffer]
|
|
* CWE - https://cwe.mitre.org/data/definitions/121[CWE-121 Stack-based Buffer Overflow]
|
|
* CWE - https://cwe.mitre.org/data/definitions/122[CWE-122 Heap-based Buffer Overflow]
|
|
* CWE - https://cwe.mitre.org/data/definitions/131[CWE-131 Incorrect Calculation of Buffer Size]
|
|
* CWE - https://cwe.mitre.org/data/definitions/193[CWE-193 Off-by-one Error]
|
|
* CWE - https://cwe.mitre.org/data/definitions/788[CWE-788 Access of Memory Location After End of Buffer]
|
|
|
|
=== Related rules
|
|
|
|
* S5782 ensures that POSIX functions are not called with arguments that trigger buffer overflows
|
|
* S5945 discourages the use of C-style arrays and suggests the use of `std::array` or `std::vector`
|
|
|
|
|
|
ifdef::env-github,rspecator-view[]
|
|
|
|
'''
|
|
== Implementation Specification
|
|
(visible only on this page)
|
|
|
|
=== Message
|
|
|
|
* Review this array access; it is likely to be an overrun.
|
|
* Review this memory access; it is likely to create an overflow.
|
|
|
|
|
|
=== Highlighting
|
|
|
|
Primary: Array access or function call
|
|
|
|
Secondary: * Index value for arrays
|
|
|
|
* Length value for functions call
|
|
* For loop end condition if applicable
|
|
|
|
|
|
'''
|
|
== Comments And Links
|
|
(visible only on this page)
|
|
|
|
Possible messages of the rule S3519:
|
|
|
|
* alpha.security.ArrayBoundV2
|
|
[source,cpp]
|
|
----
|
|
void access_exceeds(void) {
|
|
int id_sequence[3];
|
|
id_sequence[0] = 123;
|
|
id_sequence[1] = 234;
|
|
id_sequence[2] = 345;
|
|
id_sequence[3] = 456; // Noncompliant: accessing out of bounds.
|
|
// Out of bound memory access (access exceeds upper limit of memory block)
|
|
}
|
|
void access_precedes(int x) {
|
|
int buf[100];
|
|
int *p = buf;
|
|
--p;
|
|
p[0] = 1; // Out of bound memory access (accessed memory precedes memory block)
|
|
}
|
|
int getchar(void);
|
|
void access_tainted(void) {
|
|
int m = getchar();
|
|
Buffer[m] = 1; // Out of bound memory access (index is tainted)
|
|
}
|
|
----
|
|
|
|
* alpha.security.ReturnPtrRange
|
|
[source,cpp]
|
|
----
|
|
int *test_idx_sym(int i) {
|
|
static int arr[10];
|
|
if (i != 40)
|
|
return arr;
|
|
return arr + i; // Returned pointer value points outside the original object
|
|
}
|
|
----
|
|
|
|
* alpha.unix.cstring.OutOfBounds
|
|
[source,cpp]
|
|
----
|
|
char* my_calloc(int n) {
|
|
char *p = malloc(n);
|
|
memset(p, 0, n + /*null terminator*/1); // OOB: off by one
|
|
// Memory set function overflows the destination buffer
|
|
return p;
|
|
}
|
|
void memcpy1(void) {
|
|
char src[] = {1, 2, 3, 4};
|
|
char dst[10];
|
|
memcpy(dst, src, 5); // Memory copy function accesses out-of-bound array element
|
|
}
|
|
----
|
|
|
|
Typical bugs:
|
|
* Off-by-one bugs
|
|
|
|
Mitigations (extra mile):
|
|
|
|
* asan
|
|
* valgrind
|
|
* Use "bounded" alternative functions, such as `strncpy`.
|
|
* Compilation flags to harden the binary: FORTIFY_SOURCE, stack-canaries, ASLR
|
|
* fuzzing
|
|
|
|
Clarification question regarding inclusion forthis link in [https://discuss.sonarsource.com/t/layc-2023-languages-team-updates/14242/31?u=tomasz_kaminski[discuss].
|
|
|
|
Possibly missing CWEs from rspec:
|
|
* https://cwe.mitre.org/data/definitions/787.html
|
|
* https://cwe.mitre.org/data/definitions/193.html
|
|
|
|
=== is related to: S5782
|
|
|
|
endif::env-github,rspecator-view[]
|