From 449c078661046665ba0cc87a0f632085a77df8a5 Mon Sep 17 00:00:00 2001 From: dweller Date: Thu, 16 Jan 2025 17:18:45 +0200 Subject: initial commit --- urltool.c | 309 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100644 urltool.c (limited to 'urltool.c') diff --git a/urltool.c b/urltool.c new file mode 100644 index 0000000..c0687c6 --- /dev/null +++ b/urltool.c @@ -0,0 +1,309 @@ +/* + * Copyright (C) 2025 dwlr + * + * BSD 3-Clause License (BSD-3-Clause) + * See LICENSE for details + */ + +#include +#include +#include +#include + + +typedef signed char s8; +typedef unsigned char u8; +typedef s8 bool; + +#define true 1 +#define false 0 + + +char* cstr_dup(const char* s) +{ + size_t sz = 0; + char* r = NULL; + + if(!s) return NULL; + + sz = strlen(s); + r = calloc(sz+1, 1); + if(!r) return NULL; + + return memcpy(r, s, sz); +} + +char* cstr_basename(const char* path) +{ + char* dup = NULL; + char* c = NULL; + char* last_slash = NULL; + int non_slash = 0; + + if(!path) return NULL; + + dup = cstr_dup(path); + c = dup; + last_slash = NULL; + + while(*c) + { + if(*c == '/') + { + if(*(c+1)) last_slash = c; + else if(c != dup) *c = '\0'; + } + else non_slash++; + + c++; + } + + if(last_slash) + { + if(non_slash == 0) + { + dup[0] = '/'; + dup[1] = '\0'; + } + else + memmove(dup, last_slash + 1, strlen(last_slash + 1) + 1); + } + + return dup; +} + +ssize_t utf8_to_uri(const char* s, size_t size) +{ + ssize_t len = 0; + size_t i = 0; + + while(i < size) + { + if(!(s[i] & 0x80)) + { + if((s[i] >= '0' && s[i] <= '9') + || (s[i] >= 'A' && s[i] <= 'Z') + || (s[i] >= 'a' && s[i] <= 'z') + || (s[i] == '-') + || (s[i] == '.') + || (s[i] == '_') + || (s[i] == '~')) + putchar(s[i]); + else if(s[i] == ' ') + putchar('+'); + else + printf("%%%02x", (u8)s[i]); + + i++; + } + else if((s[i] & 0xF0) == 0xF0) + { + if(i + 4 >= size) return -1; + + printf("%%%02x", (u8)s[i ]); + printf("%%%02x", (u8)s[i+1]); + printf("%%%02x", (u8)s[i+2]); + printf("%%%02x", (u8)s[i+3]); + + i += 4; + } + else if((s[i] & 0xE0) == 0xE0) + { + if(i + 3 >= size) return -1; + + printf("%%%02x", (u8)s[i ]); + printf("%%%02x", (u8)s[i+1]); + printf("%%%02x", (u8)s[i+2]); + + i += 3; + } + else if((s[i] & 0xC0) == 0xC0) + { + if(i + 2 >= size) return -1; + + printf("%%%02x", (u8)s[i ]); + printf("%%%02x", (u8)s[i+1]); + + i += 2; + } + else return -1; + + len++; + } + + return len; +} + +char ascii_lower(char c) +{ + if(c >= 'A' && c <= 'Z') return c + ('a' - 'A'); + else return c; +} + +char* asciiz_lower(char* s) +{ + char* c; + for(c = s; *c; c++) *c = ascii_lower(*c); + + return s; +} + +typedef struct maybe_char maybe_char; +struct maybe_char +{ + bool ok; + char c; +}; + +maybe_char hex_to_char(const char hex[2]) +{ + int i; + maybe_char ret = {0}; + + for(i = 0; i < 2; i++) + { + char c = ascii_lower(hex[i]); + char r = 0; + + if(c >= '0' && c <= '9') r = c - '0'; + else if(c >= 'a' && c <= 'f') r = c - 'a' + 10; + else return ret; + + ret.c |= r << 4 * (1 - i); + } + + ret.ok = true; + return ret; +} + +ssize_t uri_to_utf8(const char* s, size_t size) +{ + ssize_t len = 0; + size_t i = 0; + + while(i < size) + { + if(s[i] == '+') putchar(' '); + else if(s[i] == '%') + { + maybe_char c = {0}; + + if(i + 1 >= size) return -1; + + c = hex_to_char(s + i + 1); + i += 2; + + if(c.ok) putchar(c.c); + else return -1; + } + else putchar(s[i]); + + i++; + len++; + } + + return len; +} + + +void usage(const char* name) +{ + fprintf(stderr, "%s - RFC3986 URI encoder/decoder\n" + " 2025 (C) dweller@cabin.digital\n" + " All Wrongs Reserved\n\n" + "Usage: %s e|d [files...]\n" + " e - encode URI\n" + " d - decode URI\n" + " files - optional list of files to read and encode/decode.\n" + " If empty stdin is used\n", + name, name); + + exit(1); +} + +void do_file(FILE* f, const char* name, bool context, ssize_t(*func)(const char*,size_t)) +{ + char* line = NULL; + ssize_t read = 0; + size_t len = 0; + + clearerr(f); + + while((read = getline(&line, &len, f)) != -1) + { + if(context) putchar('\t'); + func(line, read - 1); + putchar('\n'); + } + + if(line) free(line); + + if(ferror(f)) fprintf(stderr, "ERROR: failed to read '%s': %s\n", name, strerror(errno)); +} + +int main(int argc, char** argv) +{ + int r, i; + char* base = cstr_basename(argv[0]); + r = 0; + + if(strcmp(base, "urlenc") == 0) + { + for(i = 1; i < argc; i++) + { + utf8_to_uri(argv[i], strlen(argv[i])); + putchar('\n'); + } + } + else if(strcmp(base, "urldec") == 0) + { + for(i = 1; i < argc; i++) + { + uri_to_utf8(argv[i], strlen(argv[i])); + putchar('\n'); + } + } + else + { + bool encode = false; + + if(argc < 2) usage(base); + + if(strcmp(argv[1], "e") == 0) encode = true; + else if(strcmp(argv[1], "d") == 0) encode = false; + else usage(base); + + if(argc < 3) + { + if(encode) do_file(stdin, "stdin", false, utf8_to_uri); + else do_file(stdin, "stdin", false, uri_to_utf8); + } + else for(i = 2; i < argc; i++) + { + FILE* f = NULL; + const char* name = argv[i]; + + errno = 0; + f = fopen(name, "r"); + if(!f) + { + fprintf(stderr, "ERROR: could not open '%s': %s\n", name, strerror(errno)); + r = 1; + } + else + { + bool context = argc > 3; + + if(context) printf("%s:\n", name); + if(encode) do_file(f, name, context, utf8_to_uri); + else do_file(f, name, context, uri_to_utf8); + + fclose(f); + } + } + } + + free(base); + return r; +} + -- cgit v1.2.3