summaryrefslogtreecommitdiff
path: root/urltool.c
diff options
context:
space:
mode:
Diffstat (limited to 'urltool.c')
-rw-r--r--urltool.c309
1 files changed, 309 insertions, 0 deletions
diff --git a/urltool.c b/urltool.c
new file mode 100644
index 0000000..c0687c6
--- /dev/null
+++ b/urltool.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright (C) 2025 dwlr <dweller@cabin.digital>
+ *
+ * BSD 3-Clause License (BSD-3-Clause)
+ * See LICENSE for details
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+
+typedef signed char s8;
+typedef unsigned char u8;
+typedef s8 bool;
+
+#define true 1
+#define false 0
+
+
+char* cstr_dup(const char* s)
+{
+ size_t sz = 0;
+ char* r = NULL;
+
+ if(!s) return NULL;
+
+ sz = strlen(s);
+ r = calloc(sz+1, 1);
+ if(!r) return NULL;
+
+ return memcpy(r, s, sz);
+}
+
+char* cstr_basename(const char* path)
+{
+ char* dup = NULL;
+ char* c = NULL;
+ char* last_slash = NULL;
+ int non_slash = 0;
+
+ if(!path) return NULL;
+
+ dup = cstr_dup(path);
+ c = dup;
+ last_slash = NULL;
+
+ while(*c)
+ {
+ if(*c == '/')
+ {
+ if(*(c+1)) last_slash = c;
+ else if(c != dup) *c = '\0';
+ }
+ else non_slash++;
+
+ c++;
+ }
+
+ if(last_slash)
+ {
+ if(non_slash == 0)
+ {
+ dup[0] = '/';
+ dup[1] = '\0';
+ }
+ else
+ memmove(dup, last_slash + 1, strlen(last_slash + 1) + 1);
+ }
+
+ return dup;
+}
+
+ssize_t utf8_to_uri(const char* s, size_t size)
+{
+ ssize_t len = 0;
+ size_t i = 0;
+
+ while(i < size)
+ {
+ if(!(s[i] & 0x80))
+ {
+ if((s[i] >= '0' && s[i] <= '9')
+ || (s[i] >= 'A' && s[i] <= 'Z')
+ || (s[i] >= 'a' && s[i] <= 'z')
+ || (s[i] == '-')
+ || (s[i] == '.')
+ || (s[i] == '_')
+ || (s[i] == '~'))
+ putchar(s[i]);
+ else if(s[i] == ' ')
+ putchar('+');
+ else
+ printf("%%%02x", (u8)s[i]);
+
+ i++;
+ }
+ else if((s[i] & 0xF0) == 0xF0)
+ {
+ if(i + 4 >= size) return -1;
+
+ printf("%%%02x", (u8)s[i ]);
+ printf("%%%02x", (u8)s[i+1]);
+ printf("%%%02x", (u8)s[i+2]);
+ printf("%%%02x", (u8)s[i+3]);
+
+ i += 4;
+ }
+ else if((s[i] & 0xE0) == 0xE0)
+ {
+ if(i + 3 >= size) return -1;
+
+ printf("%%%02x", (u8)s[i ]);
+ printf("%%%02x", (u8)s[i+1]);
+ printf("%%%02x", (u8)s[i+2]);
+
+ i += 3;
+ }
+ else if((s[i] & 0xC0) == 0xC0)
+ {
+ if(i + 2 >= size) return -1;
+
+ printf("%%%02x", (u8)s[i ]);
+ printf("%%%02x", (u8)s[i+1]);
+
+ i += 2;
+ }
+ else return -1;
+
+ len++;
+ }
+
+ return len;
+}
+
+char ascii_lower(char c)
+{
+ if(c >= 'A' && c <= 'Z') return c + ('a' - 'A');
+ else return c;
+}
+
+char* asciiz_lower(char* s)
+{
+ char* c;
+ for(c = s; *c; c++) *c = ascii_lower(*c);
+
+ return s;
+}
+
+typedef struct maybe_char maybe_char;
+struct maybe_char
+{
+ bool ok;
+ char c;
+};
+
+maybe_char hex_to_char(const char hex[2])
+{
+ int i;
+ maybe_char ret = {0};
+
+ for(i = 0; i < 2; i++)
+ {
+ char c = ascii_lower(hex[i]);
+ char r = 0;
+
+ if(c >= '0' && c <= '9') r = c - '0';
+ else if(c >= 'a' && c <= 'f') r = c - 'a' + 10;
+ else return ret;
+
+ ret.c |= r << 4 * (1 - i);
+ }
+
+ ret.ok = true;
+ return ret;
+}
+
+ssize_t uri_to_utf8(const char* s, size_t size)
+{
+ ssize_t len = 0;
+ size_t i = 0;
+
+ while(i < size)
+ {
+ if(s[i] == '+') putchar(' ');
+ else if(s[i] == '%')
+ {
+ maybe_char c = {0};
+
+ if(i + 1 >= size) return -1;
+
+ c = hex_to_char(s + i + 1);
+ i += 2;
+
+ if(c.ok) putchar(c.c);
+ else return -1;
+ }
+ else putchar(s[i]);
+
+ i++;
+ len++;
+ }
+
+ return len;
+}
+
+
+void usage(const char* name)
+{
+ fprintf(stderr, "%s - RFC3986 URI encoder/decoder\n"
+ " 2025 (C) dweller@cabin.digital\n"
+ " All Wrongs Reserved\n\n"
+ "Usage: %s e|d [files...]\n"
+ " e - encode URI\n"
+ " d - decode URI\n"
+ " files - optional list of files to read and encode/decode.\n"
+ " If empty stdin is used\n",
+ name, name);
+
+ exit(1);
+}
+
+void do_file(FILE* f, const char* name, bool context, ssize_t(*func)(const char*,size_t))
+{
+ char* line = NULL;
+ ssize_t read = 0;
+ size_t len = 0;
+
+ clearerr(f);
+
+ while((read = getline(&line, &len, f)) != -1)
+ {
+ if(context) putchar('\t');
+ func(line, read - 1);
+ putchar('\n');
+ }
+
+ if(line) free(line);
+
+ if(ferror(f)) fprintf(stderr, "ERROR: failed to read '%s': %s\n", name, strerror(errno));
+}
+
+int main(int argc, char** argv)
+{
+ int r, i;
+ char* base = cstr_basename(argv[0]);
+ r = 0;
+
+ if(strcmp(base, "urlenc") == 0)
+ {
+ for(i = 1; i < argc; i++)
+ {
+ utf8_to_uri(argv[i], strlen(argv[i]));
+ putchar('\n');
+ }
+ }
+ else if(strcmp(base, "urldec") == 0)
+ {
+ for(i = 1; i < argc; i++)
+ {
+ uri_to_utf8(argv[i], strlen(argv[i]));
+ putchar('\n');
+ }
+ }
+ else
+ {
+ bool encode = false;
+
+ if(argc < 2) usage(base);
+
+ if(strcmp(argv[1], "e") == 0) encode = true;
+ else if(strcmp(argv[1], "d") == 0) encode = false;
+ else usage(base);
+
+ if(argc < 3)
+ {
+ if(encode) do_file(stdin, "stdin", false, utf8_to_uri);
+ else do_file(stdin, "stdin", false, uri_to_utf8);
+ }
+ else for(i = 2; i < argc; i++)
+ {
+ FILE* f = NULL;
+ const char* name = argv[i];
+
+ errno = 0;
+ f = fopen(name, "r");
+ if(!f)
+ {
+ fprintf(stderr, "ERROR: could not open '%s': %s\n", name, strerror(errno));
+ r = 1;
+ }
+ else
+ {
+ bool context = argc > 3;
+
+ if(context) printf("%s:\n", name);
+ if(encode) do_file(f, name, context, utf8_to_uri);
+ else do_file(f, name, context, uri_to_utf8);
+
+ fclose(f);
+ }
+ }
+ }
+
+ free(base);
+ return r;
+}
+