STC csview: Non-zero terminated String View

April 29, 2026 · View on GitHub

String

The type csview is a non-zero terminated and utf8-iterable string view. It refers to a constant contiguous sequence of char-elements with the first element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size. See zsview for a zero-terminated string view/span type.

csview never allocates memory, and therefore need not be destructed. Its lifetime is limited by the source string storage. It keeps the length of the string, which redcues the need to call strlen() in usage.

  • csview iterators works on UTF8 codepoints - like cstr and zsview (see Example 2).
  • Because it is not zero-terminated, it must be printed the following way:
csview sv = c_sv("Hello world");
sv = csview_subview(sv, 0, 5);
printf(c_svfmt "\n", c_svarg(sv)); // "Hello"

See the c++ class std::basic_string_view for a functional description.

Header file

All csview definitions and prototypes are available by including a single header file.

#include <stc/cstr.h>
#include <stc/csview.h> // after cstr.h: include extra cstr-csview functions

Methods

csview          c_sv(const char literal_only[]);                        // from string literal only
csview          c_sv(const char* str, isize_t n);                       // from a const char* and length n
csview          csview_from(const char* str);                           // from const char* str
csview          csview_from_n(const char* str, isize_t n);              // alias for c_sv(str, n)

isize_t         csview_size(csview sv);
bool            csview_is_empty(csview sv);
void            csview_clear(csview* self);

bool            csview_equals(csview sv, const char* str);
isize_t         csview_find(csview sv, const char* str);
isize_t         csview_find_sv(csview sv, csview find);
bool            csview_contains(csview sv, const char* str);
bool            csview_starts_with(csview sv, const char* str);
bool            csview_ends_with(csview sv, const char* str);

csview          csview_subview(csview sv, isize_t pos, isize_t len);
csview          csview_subview_pro(csview sv, isize_t pos, isize_t len); // negative pos count from end
csview          csview_slice(csview sv, isize_t pos1, isize_t pos2);
csview          csview_tail(csview sv, isize_t len);                    // span of the trailing len bytes
csview          csview_trim(csview sv);                                 // trim whitespace and ctrl-chars on both ends
csview          csview_trim_start(csview sv);                           // trim from start of view
csview          csview_trim_end(csview sv);                             // trim from end of view

                // split by separator string or UTF8 character. *pos is start search position and is auto updated:
csview          csview_split(csview sv, const char* sep, isize_t* pos); // *pos becomes c_NPOS after last token.
csview          csview_strtok(csview sv, const char* delims, isize_t* pos); // split by any single UTF8-character in delims.

UTF8 methods

csview          csview_u8_from(const char* str, isize_t u8pos, isize_t u8len); // construct csview with u8len runes
isize_t         csview_u8_size(csview sv);                              // number of utf8 runes
csview_iter     csview_u8_at(csview sv, isize_t u8pos);                 // get rune at rune position
csview          csview_u8_subview(csview sv, isize_t u8pos, isize_t u8len); // utf8 span
csview          csview_u8_tail(csview sv, isize_t u8len);               // span of the trailing u8len runes.
bool            csview_u8_valid(csview sv);                             // check utf8 validity of sv

bool            csview_iequals(csview sv, const char* str);             // utf8 case-insensitive comparison
bool            csview_istarts_with(csview sv, const char* str);        // utf8 case-insensitive
bool            csview_iends_with(csview sv, const char* str);          // utf8 case-insensitive

csview_iter     csview_begin(const csview* self);                       // utf8 iteration
csview_iter     csview_end(const csview* self);
void            csview_next(csview_iter* it);                           // next utf8 codepoint
csview_iter     csview_advance(csview_iter it, isize_t u8pos);          // advance +/- codepoints
uint32_t        csview_codepoint(const csview_iter* it);                // return cached codepoint for iter

Iterate tokens with c_each_split_sv() and c_each_strtok_sv()

Iterate tokens in an input string split by a separator string:

  • for (c_each_split_sv(it, const char* separator, csview input)) ...;
  • it.token is a csview of the current token.

Iterate tokens in an input string split by any of the UTF8 characters in delimiters:

  • for (c_each_strtok_sv(it, const char* delimiters, csview input)) ...;
  • it.token is a csview of the current token.

There are also c_each_split() and c_each_strtok() which takes a const char* as input.

for (c_each_split(it, ", ", "hello, one, two, three")
    printf("'%.*s' ", c_svarg(it.token));

// 'hello' 'one' 'two' 'three'

Helper methods

size_t          csview_hash(const csview* x);
int             csview_cmp(const csview* x, const csview* y);
bool            csview_eq(const csview* x, const csview* y);
int             csview_icmp(const csview* x, const csview* y);
bool            csview_ieq(const csview* x, const csview* y);

Types

Type nameType definitionUsed to represent...
csviewstruct { const char *buf; isize_t size; }The string view type
csview_valueconst charThe string element type
csview_iterunion { csview_value *ref; csview chr; }UTF8 iterator

Constants and macros

NameValueUsage
c_svarg(sv)printf argumentprintf("sv: " c_svfmt "\n", c_svarg(sv));

Example

#include <stc/cstr.h>
#include <stc/csview.h>


int main(void)
{
    cstr str = cstr_lit("We think in generalities, but we live in details.");
    csview sv = cstr_sv(&str);
    csview sv1 = csview_subview(sv, 3, 5);                 // "think"
    isize_t pos = csview_find(sv, "live");                   // position of "live"
    csview sv2 = csview_subview(sv, pos, 4);               // "live"
    csview sv3 = csview_subview_pro(sv, -8, 7);            // "details"
    printf(c_svfmt ", " c_svfmt ", " c_svfmt "\n",
           c_svarg(sv1), c_svarg(sv2), c_svarg(sv3));

    cstr_assign(&str, "apples are green or red");
    sv = cstr_sv(&str);
    cstr s2 = cstr_from_sv(csview_subview_pro(sv, -3, 3)); // "red"
    cstr s3 = cstr_from_sv(csview_subview(sv, 0, 6));      // "apples"

    c_drop(cstr, &str, &s2, &s3);
}

Output:

think live details
red Apples

Example 2: UTF8 handling

#include <stc/cstr.h>

int main(void)
{
    cstr s1 = cstr_lit("hell😀 w😀rld");

    cstr_u8_replace(&s1, 7, 1, "ø");
    printf("%s\n", cstr_str(&s1));

    for (c_each(i, cstr, s1))
        printf(c_svfmt ",", c_svarg(i.chr));

    cstr_drop(&s1);
}

Output:

hell😀 wørld
h,e,l,l,😀, ,w,ø,r,l,d,

Example 3: csview tokenizer (string split)

Splits strings into tokens. print_split() makes no memory allocations or strlen() calls, and does not depend on zero-terminated strings. string_split() function returns a vector of cstr.

#include <stdio.h>
#include <stc/csview.h>

void print_split(csview input, const char* sep)
{
    for (c_each_split_sv(i, sep, input))
        printf("[" c_svfmt "]\n", c_svarg(i.token));
    puts("");
}
#include <stc/cstr.h>
#define i_pro_key cstr
#include <stc/stack.h>

stack_cstr string_split(csview input, const char* sep)
{
    stack_cstr out = {0};

    for (c_each_split_sv(i, sep, input))
        stack_cstr_push(&out, cstr_from_sv(i.token));

    return out;
}

int main(void)
{
    print_split(c_sv("//This is a//double-slash//separated//string"), "//");
    print_split(c_sv("This has no matching separator"), "xx");

    stack_cstr s = string_split(c_sv("Split,this,,string,now,"), ",");

    for (c_each(i, stack_cstr, s))
        printf("[%s]\n", cstr_str(i.ref));
    puts("");

    stack_cstr_drop(&s);
}

Output:

[]
[This is a]
[double-slash]
[separated]
[string]

[This has no matching separator]

[Split]
[this]
[]
[string]
[now]
[]