summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--LICENSE30
-rw-r--r--README38
-rwxr-xr-xbuild-musl.sh6
-rwxr-xr-xbuild.sh28
-rwxr-xr-xclean.sh3
-rwxr-xr-xexamples/search.sh6
-rwxr-xr-xinstall.sh9
-rw-r--r--urltool.c309
9 files changed, 432 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ad0cbf4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+urldec
+urlenc
+urltool
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..774fd60
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+BSD 3-Clause License (BSD-3-Clause)
+
+Copyright (C) 2025 dwlr <dweller@cabin.digital>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/README b/README
new file mode 100644
index 0000000..dc89f94
--- /dev/null
+++ b/README
@@ -0,0 +1,38 @@
+URLTOOL
+=======
+
+URL tool is a simple shell tool to encode and decode URI encoding as set by
+RFC3986.
+
+* `urltool` reads lines from files or stdin and encodes/decodes them onto stdout
+* `urlenc` and `urldec` are symlinks to `urltool` that read and encode/decode
+ their arguments and output to stdout.
+
+
+## Build & Install
+
+Only tested on Linux, but should be POSIX compliant. The only dependency is
+libc with getline(3).
+
+Just run `./build.sh` to build "normally", you can set $CC and $CFLAGS from env.
+If you have `musl-gcc` installed and prefer static executables like me, you can
+run `./build-musl.sh` to do so.
+
+By default a debug build with debug information is created. `./build.sh fast`
+to build with optimizations on. Assumes GCC-like flags.
+
+`./clean.sh` to remove the build artifacts.
+
+`./install.sh` to install to "${DESTDIR}${PREFIX}/${BINDIR}"
+
+The scripts are tiny so feel free to edit.
+
+
+## Why?
+
+To help me build shell pipelines that do HTTP GET requests and alike.
+See examples/.
+
+
+## Who?
+dweller from cabin.digital
diff --git a/build-musl.sh b/build-musl.sh
new file mode 100755
index 0000000..f373832
--- /dev/null
+++ b/build-musl.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -e
+
+CC="musl-gcc"
+STATIC="-static"
+
+. ./build.sh
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..88b193b
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,28 @@
+#!/bin/sh -e
+
+CC=${CC:-gcc}
+STATIC=${STATIC:-}
+DEBUGFLAGS=${OPTFLAGS:-"-Og -g"}
+FASTFLAGS=${OPTFLAGS:-"-O3 -flto -fwhole-program -ffunction-sections -fdata-sections -Wl,--gc-sections"}
+
+CFLAGS=" \
+ -Wall -Wextra -Wpedantic -Wshadow \
+ -Wno-long-long \
+ -std=c89 \
+ $STATIC \
+ -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200809L \
+"
+
+case "$1" in
+ "help")
+ >&2 echo "Usage: $0 [help|fast|debug]"
+ >&2 echo " fast/debug - add optimization or debug flags"
+ exit 1
+ ;;
+ "fast") CFLAGS="$CFLAGS $FASTFLAGS" ;;
+ "debug"|*) CFLAGS="$CFLAGS $DEBUGFLAGS" ;;
+esac
+
+$CC $CFLAGS urltool.c -o urltool
+ln -rsf urltool urldec
+ln -rsf urltool urlenc
diff --git a/clean.sh b/clean.sh
new file mode 100755
index 0000000..130804c
--- /dev/null
+++ b/clean.sh
@@ -0,0 +1,3 @@
+#!/bin/sh -e
+
+rm urltool urldec urlenc
diff --git a/examples/search.sh b/examples/search.sh
new file mode 100755
index 0000000..ab4cca2
--- /dev/null
+++ b/examples/search.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -e
+
+BROWSER=${BROWSER:-w3m}
+URL="https://html.duckduckgo.com/html/?q="
+
+$BROWSER "${URL}$(urlenc "$@")"
diff --git a/install.sh b/install.sh
new file mode 100755
index 0000000..5167c8d
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,9 @@
+#!/bin/sh -e
+
+BINDIR=${BINDIR:-bin}
+PREFIX=${PREFIX:-/usr/local}
+
+install -d "${DESTDIR}${PREFIX}/${BINDIR}"
+install -sm 755 urltool "${DESTDIR}${PREFIX}/${BINDIR}"
+ln -rsf "${DESTDIR}${PREFIX}/${BINDIR}/urltool" "${DESTDIR}${PREFIX}/${BINDIR}/urldec"
+ln -rsf "${DESTDIR}${PREFIX}/${BINDIR}/urltool" "${DESTDIR}${PREFIX}/${BINDIR}/urlenc"
diff --git a/urltool.c b/urltool.c
new file mode 100644
index 0000000..c0687c6
--- /dev/null
+++ b/urltool.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright (C) 2025 dwlr <dweller@cabin.digital>
+ *
+ * BSD 3-Clause License (BSD-3-Clause)
+ * See LICENSE for details
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+
+typedef signed char s8;
+typedef unsigned char u8;
+typedef s8 bool;
+
+#define true 1
+#define false 0
+
+
+char* cstr_dup(const char* s)
+{
+ size_t sz = 0;
+ char* r = NULL;
+
+ if(!s) return NULL;
+
+ sz = strlen(s);
+ r = calloc(sz+1, 1);
+ if(!r) return NULL;
+
+ return memcpy(r, s, sz);
+}
+
+char* cstr_basename(const char* path)
+{
+ char* dup = NULL;
+ char* c = NULL;
+ char* last_slash = NULL;
+ int non_slash = 0;
+
+ if(!path) return NULL;
+
+ dup = cstr_dup(path);
+ c = dup;
+ last_slash = NULL;
+
+ while(*c)
+ {
+ if(*c == '/')
+ {
+ if(*(c+1)) last_slash = c;
+ else if(c != dup) *c = '\0';
+ }
+ else non_slash++;
+
+ c++;
+ }
+
+ if(last_slash)
+ {
+ if(non_slash == 0)
+ {
+ dup[0] = '/';
+ dup[1] = '\0';
+ }
+ else
+ memmove(dup, last_slash + 1, strlen(last_slash + 1) + 1);
+ }
+
+ return dup;
+}
+
+ssize_t utf8_to_uri(const char* s, size_t size)
+{
+ ssize_t len = 0;
+ size_t i = 0;
+
+ while(i < size)
+ {
+ if(!(s[i] & 0x80))
+ {
+ if((s[i] >= '0' && s[i] <= '9')
+ || (s[i] >= 'A' && s[i] <= 'Z')
+ || (s[i] >= 'a' && s[i] <= 'z')
+ || (s[i] == '-')
+ || (s[i] == '.')
+ || (s[i] == '_')
+ || (s[i] == '~'))
+ putchar(s[i]);
+ else if(s[i] == ' ')
+ putchar('+');
+ else
+ printf("%%%02x", (u8)s[i]);
+
+ i++;
+ }
+ else if((s[i] & 0xF0) == 0xF0)
+ {
+ if(i + 4 >= size) return -1;
+
+ printf("%%%02x", (u8)s[i ]);
+ printf("%%%02x", (u8)s[i+1]);
+ printf("%%%02x", (u8)s[i+2]);
+ printf("%%%02x", (u8)s[i+3]);
+
+ i += 4;
+ }
+ else if((s[i] & 0xE0) == 0xE0)
+ {
+ if(i + 3 >= size) return -1;
+
+ printf("%%%02x", (u8)s[i ]);
+ printf("%%%02x", (u8)s[i+1]);
+ printf("%%%02x", (u8)s[i+2]);
+
+ i += 3;
+ }
+ else if((s[i] & 0xC0) == 0xC0)
+ {
+ if(i + 2 >= size) return -1;
+
+ printf("%%%02x", (u8)s[i ]);
+ printf("%%%02x", (u8)s[i+1]);
+
+ i += 2;
+ }
+ else return -1;
+
+ len++;
+ }
+
+ return len;
+}
+
+char ascii_lower(char c)
+{
+ if(c >= 'A' && c <= 'Z') return c + ('a' - 'A');
+ else return c;
+}
+
+char* asciiz_lower(char* s)
+{
+ char* c;
+ for(c = s; *c; c++) *c = ascii_lower(*c);
+
+ return s;
+}
+
+typedef struct maybe_char maybe_char;
+struct maybe_char
+{
+ bool ok;
+ char c;
+};
+
+maybe_char hex_to_char(const char hex[2])
+{
+ int i;
+ maybe_char ret = {0};
+
+ for(i = 0; i < 2; i++)
+ {
+ char c = ascii_lower(hex[i]);
+ char r = 0;
+
+ if(c >= '0' && c <= '9') r = c - '0';
+ else if(c >= 'a' && c <= 'f') r = c - 'a' + 10;
+ else return ret;
+
+ ret.c |= r << 4 * (1 - i);
+ }
+
+ ret.ok = true;
+ return ret;
+}
+
+ssize_t uri_to_utf8(const char* s, size_t size)
+{
+ ssize_t len = 0;
+ size_t i = 0;
+
+ while(i < size)
+ {
+ if(s[i] == '+') putchar(' ');
+ else if(s[i] == '%')
+ {
+ maybe_char c = {0};
+
+ if(i + 1 >= size) return -1;
+
+ c = hex_to_char(s + i + 1);
+ i += 2;
+
+ if(c.ok) putchar(c.c);
+ else return -1;
+ }
+ else putchar(s[i]);
+
+ i++;
+ len++;
+ }
+
+ return len;
+}
+
+
+void usage(const char* name)
+{
+ fprintf(stderr, "%s - RFC3986 URI encoder/decoder\n"
+ " 2025 (C) dweller@cabin.digital\n"
+ " All Wrongs Reserved\n\n"
+ "Usage: %s e|d [files...]\n"
+ " e - encode URI\n"
+ " d - decode URI\n"
+ " files - optional list of files to read and encode/decode.\n"
+ " If empty stdin is used\n",
+ name, name);
+
+ exit(1);
+}
+
+void do_file(FILE* f, const char* name, bool context, ssize_t(*func)(const char*,size_t))
+{
+ char* line = NULL;
+ ssize_t read = 0;
+ size_t len = 0;
+
+ clearerr(f);
+
+ while((read = getline(&line, &len, f)) != -1)
+ {
+ if(context) putchar('\t');
+ func(line, read - 1);
+ putchar('\n');
+ }
+
+ if(line) free(line);
+
+ if(ferror(f)) fprintf(stderr, "ERROR: failed to read '%s': %s\n", name, strerror(errno));
+}
+
+int main(int argc, char** argv)
+{
+ int r, i;
+ char* base = cstr_basename(argv[0]);
+ r = 0;
+
+ if(strcmp(base, "urlenc") == 0)
+ {
+ for(i = 1; i < argc; i++)
+ {
+ utf8_to_uri(argv[i], strlen(argv[i]));
+ putchar('\n');
+ }
+ }
+ else if(strcmp(base, "urldec") == 0)
+ {
+ for(i = 1; i < argc; i++)
+ {
+ uri_to_utf8(argv[i], strlen(argv[i]));
+ putchar('\n');
+ }
+ }
+ else
+ {
+ bool encode = false;
+
+ if(argc < 2) usage(base);
+
+ if(strcmp(argv[1], "e") == 0) encode = true;
+ else if(strcmp(argv[1], "d") == 0) encode = false;
+ else usage(base);
+
+ if(argc < 3)
+ {
+ if(encode) do_file(stdin, "stdin", false, utf8_to_uri);
+ else do_file(stdin, "stdin", false, uri_to_utf8);
+ }
+ else for(i = 2; i < argc; i++)
+ {
+ FILE* f = NULL;
+ const char* name = argv[i];
+
+ errno = 0;
+ f = fopen(name, "r");
+ if(!f)
+ {
+ fprintf(stderr, "ERROR: could not open '%s': %s\n", name, strerror(errno));
+ r = 1;
+ }
+ else
+ {
+ bool context = argc > 3;
+
+ if(context) printf("%s:\n", name);
+ if(encode) do_file(f, name, context, utf8_to_uri);
+ else do_file(f, name, context, uri_to_utf8);
+
+ fclose(f);
+ }
+ }
+ }
+
+ free(base);
+ return r;
+}
+