wikid

A quick and simple CLI-program for downloading and rendering wikipedia pages in the terminal.
git clone git://git.noxz.tech/wikid
Log | Files | Refs | README | LICENSE

commit 3b2c4179078be7a4051a666153fb60f2f106435b
parent 88b6f190b56e8264b59c1b7a0ef440e15f3a2165
Author: z0noxz <chris@noxz.tech>
Date:   Sun, 26 Aug 2018 14:08:30 +0200

Initial commit

Diffstat:
AMakefile | 45+++++++++++++++++++++++++++++++++++++++++++++
MREADME.md | 30++++++++++++++++++++++++++++--
Aconfig.def.h | 24++++++++++++++++++++++++
Aconfig.mk | 25+++++++++++++++++++++++++
Aterm.c | 23+++++++++++++++++++++++
Aterm.h | 1+
Autil.c | 217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autil.h | 19+++++++++++++++++++
Awikid.c | 390+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 772 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,45 @@ +.POSIX: + +include config.mk + +SRC = util.c term.c wikid.c +OBJ = ${SRC:.c=.o} + +all: options wikid + +options: + @echo wikid build options: + @echo "CFLAGS = ${CFLAGS}" + @echo "LDFLAGS = ${LDFLAGS}" + @echo "CC = ${CC}" + +.c.o: + @echo CC $< + @${CC} ${CFLAGS} -c $< + +wikid.o: util.h term.h config.h + +${OBJ}: config.h config.mk + +config.h: + @echo creating $@ from config.def.h + @cp config.def.h $@ + +wikid: ${OBJ} + @echo CC -o $@ + @${CC} -g -o $@ ${OBJ} ${LDFLAGS} + +clean: + @echo cleaning + @rm -f wikid ${OBJ} + @rm -f config.h + +install: wikid + mkdir -p /usr/local/bin + cp -f wikid /usr/local/bin + chmod 755 /usr/local/bin/wikid + +uninstall: + rm -f /usr/local/bin/wikid + +.PHONY: all options clean install uninstall diff --git a/README.md b/README.md @@ -1,2 +1,28 @@ -# wikid -wikid is a quick and simple CLI-program for downloading and rendering wikipedia pages in your terminal. And it's not **wikid** as in *wiki daemon*, but as in *wicked, Strikingly good, and effective*. +wikid +===== +wikid is a quick and simple CLI-program for downloading and rendering wikipedia +pages in your terminal. And it's not **wikid** as in *wiki daemon*, but as in +*wicked, Strikingly good, and effective*. + +Requirements +------------ +wikid uses the **curl** library to download the wiki pages, and **ncurses** for +retrieving the terminal width for line output. + +Installation +------------ +Edit config.mk to match your local setup (wikid is installed into the +/usr/local namespace by default), then simply enter the following command to +install (if necessary as root): + + make clean install + +Example usage of wikid +---------------------- +Read about wikipedia, using `less` + + wikid wikipedia | less + +Read about wikipedia in german, using `less` + + wikid -lde wikipedia | less diff --git a/config.def.h b/config.def.h @@ -0,0 +1,24 @@ +#define LANG_POS 8 +#define _PADDING 3 + +char language[] = "en"; +char temp_file[] = "/tmp/__wikidtmp.XXXXXX"; +const char webpage[] = "https://xx.wikipedia.org/w/api.php" + "?format=json" + "&action=query" + "&prop=extracts" + "&exlimit=1" + "&explaintext" + "&redirects" + "&titles="; + +/* cleanup mappings for wikipedia responses + * NOTE: the replacement cannot be greater in size than the entity itself. If + * it is, it'll just be ignored. + */ +const Dictionary cleanup_mappings[] = { + /* entity replacement */ + { "\\n", "\n\n" }, + { "\\t", " " }, + { "\\\"", "\"" }, +}; diff --git a/config.mk b/config.mk @@ -0,0 +1,25 @@ +# wikid version +VERSION = 0.1.1 + +# paths +PREFIX = /usr/local +MANPREFIX = ${PREFIX}/share/man + +# flags +CPPFLAGS = -D_DEFAULT_SOURCE\ + -D_BSD_SOURCE\ + -D_POSIX_C_SOURCE=2\ + -DVERSION=\"${VERSION}\" + +CFLAGS = -std=c99\ + -Wall\ + -Wno-deprecated-declarations\ + -Os\ + -pedantic\ + ${CPPFLAGS} + +LDFLAGS = -lcurl\ + -lncurses + +# compiler and linker +CC = cc diff --git a/term.c b/term.c @@ -0,0 +1,23 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdlib.h> +#include <term.h> + +unsigned get_terminal_width(void) +{ + const char* term = getenv("TERM"); + char buff[1024]; + int cols; + + if (!term) + return 0; + + if (tgetent(buff, term) <= 0) + return 0; + + cols = tgetnum("co"); + + if (cols == -1) + return 0; + + return cols; +} diff --git a/term.h b/term.h @@ -0,0 +1 @@ +unsigned get_terminal_width(void); diff --git a/util.c b/util.c @@ -0,0 +1,217 @@ +/* See LICENSE file for copyright and license details. */ +#include <string.h> +#include "util.h" + +void string_replace(char *str, const char find, const char replace) +{ + char *pt = str; + while (*pt) { + if (*pt == find) + *pt = replace; + pt++; + } +} + +void string_remove(char *str, const char *rem) +{ + char *pt = str; + const char *rpt = rem; + int length = strlen(rem); + + while (*pt) { + if (*pt == *rpt) { + rpt++; + if (!*rpt) { + pt++; + memmove(pt - length, pt, 1 + strlen(pt)); + rpt = rem; + pt -= length; + continue; + } + } else { + rpt = rem; + } + pt++; + } +} + +void string_trim(char *str) +{ + char *pt = str; + + while (*pt == ' ' || *pt == '\n') + pt++; + + if (strlen(pt) == 0) { + str[0] = '\0'; + } else { + memmove(str, pt, 1 + strlen(pt)); + pt = str + strlen(str) - 1; + + while (*pt == ' ' || *pt == '\n') + pt--; + + memmove(pt + 1, str + strlen(str), strlen(pt + 1)); + } +} + +void string_remove_redundent_spaces(char *str) +{ + char *dest = str; + + while (*str != '\0') { + while ((*str == ' ' && *(str + 1) == ' ') + || (*str == ' ' && *(str + 1) == '\n') + || (*str == ' ' && *(str - 1) == '\n')) + str++; + *dest++ = *str++; + } + *dest = '\0'; +} + +void dictionary_replace(char *str, const Dictionary *dict, int size) +{ + const Dictionary *entry = dict; + const Dictionary *end = dict + size; + const char *ent; + const char *rep; + char *ptr; + int ent_len; + int rep_len; + + while (entry < end) { + ent = entry->entity; + rep = entry->replacement; + ent_len = strlen(ent); + rep_len = strlen(rep); + + if (ent_len < rep_len) { + entry++; + continue; + } + + ptr = str; + while (*ptr) { + if (*ptr == *ent++) { + if (*ent == '\0') { + ptr -= (ent_len - 1); + + while (*rep) + *ptr++ = *rep++; + + rep -= rep_len; + memmove( + ptr, + ptr + ent_len - rep_len, + 1 + strlen(ptr + ent_len - rep_len) + ); + continue; + } + } else if (*ptr == entry->entity[0]) { + ent = entry->entity + 1; + } else { + ent = entry->entity; + } + ptr++; + } + entry++; + } +} + +int tiny_pow(int x, int y) +{ + int z = x; + + if (y == 0) + return 1; + + while (y-- > 1) + z *= x; + + return z; +} + +int hex_value(char c) +{ + if (c >= 0x30 && c <= 0x39) + return c - 0x30; + else if (c >= 0x41 && c <= 0x46) + return c - 0x37; + else if (c >= 0x61 && c <= 0x66) + return c - 0x57; + else + return -1; +} + +void ucs_to_utf8(char *ucs) +{ + unsigned u = 0; + unsigned c = 0; + int length = strlen(ucs); + char *pt = ucs; + + while (*pt) + u += hex_value(*pt++) * tiny_pow(16, length - 1 - c++); + + pt = ucs; + + if (u <= 0x7f) { + *pt++ = u; + } else if (u <= 0x7ff) { + *pt++ = 0xc0 | (u >> 6); + *pt++ = 0x80 | ((u >> 0) & 0x3f); + } else if (u <= 0xffff) { + *pt++ = 0xe0 | (u >> 12); + *pt++ = 0x80 | ((u >> 6) & 0x3f); + *pt++ = 0x80 | ((u >> 0) & 0x3f); + } else if (u <= 0x1fffff) { + *pt++ = 0xf0 | (u >> 18); + *pt++ = 0x80 | ((u >> 12) & 0x3f); + *pt++ = 0x80 | ((u >> 6) & 0x3f); + *pt++ = 0x80 | ((u >> 0) & 0x3f); + } else { + *pt++ = 0x3f; + } + + *pt = '\0'; +} + +void unicode_decode(char *str) +{ + char buff[5]; + char *pt = str; + char *bpt = buff; + const char indic[2] = { '\\', 'u' }; + int c = 0; + int j = 0; + + while (*pt) { + if (c < 2 && *pt == indic[c]) { + c++; + } else if (c >= 2 && c < 6 && hex_value(*pt) != -1) { + *bpt++ = *pt; + *bpt = '\0'; + c++; + } else if (c == 6) { + ucs_to_utf8(buff); + bpt = buff; + pt -= c; + j = 0; + + while (*bpt) { + *pt++ = *bpt++; + j++; + } + + memmove(pt, pt + c - j, 1 + strlen(pt + c - j)); + + bpt = buff; + c = 0; + continue; + } else { + bpt = buff; + c = 0; + } + pt++; + } +} diff --git a/util.h b/util.h @@ -0,0 +1,19 @@ +typedef enum { + false, + true +} bool; + +typedef struct { + const char *entity; + const char *replacement; +} Dictionary; + +void string_replace(char *str, char find, char replace); +void string_remove(char *str, const char *rem); +void string_trim(char *str); +void string_remove_redundent_spaces(char *str); +void dictionary_replace(char *str, const Dictionary *dict, int size); +int m_pow(int x, int y); +int hex_value(char c); +void ucs_to_utf8(char *ucs); +void unicode_decode(char *str); diff --git a/wikid.c b/wikid.c @@ -0,0 +1,390 @@ +/* + * Downloads and renders wikipedia pages in your terminal, with some formatting + * Copyright (C) 2018 z0noxz, <chris@noxz.tech> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <curl/curl.h> + +#include "util.h" +#include "term.h" +#include "config.h" + +/* macros */ +#define LENGTH(x) (sizeof(x) / sizeof(x[0])) +#define PADDING(x) (x * _PADDING) +#define INDEX_OF(x, y) ((int)(x - y) / sizeof(y[0])) + +/* function declarations */ +char *get_wiki_url(char *subject); +int download_wiki(char *url); +void print_line(int padding, const char *format, char *line); +void handle_line(char *line); +int print_wiki(char *subject); + +/* variables */ +const char *usage = "usage: wikid [-hlr] <subject>"; +unsigned total_line_count = 0; +unsigned section_line_count = 0; +bool blank_line = false; +bool pre_heading = true; +char global_options = '\0'; +unsigned terminal_width; +int temp_file_descriptor; + +/* function implementations */ +char *get_wiki_url(char *subject) +{ + char *url = (char*)malloc(2048 * sizeof(char)); + + strcpy(url, webpage); + strcat(url, subject); + + memmove(url + LANG_POS, language, 2); + string_replace(url, ' ', '_'); + + return url; +} + +int download_wiki(char *url) +{ + int state = 0; + char error[CURL_ERROR_SIZE]; + FILE *fptr; + CURL *curl; + CURLcode status; + + temp_file_descriptor = mkstemp(temp_file); + if (temp_file_descriptor == -1) { + fprintf(stderr, "%s\n", "Can't open tmp file"); + return CURLE_FAILED_INIT; + } + fptr = fdopen(dup(temp_file_descriptor), "w"); + + curl_global_init(CURL_GLOBAL_ALL); + curl = curl_easy_init(); + + if (curl) { + if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error) != CURLE_OK) { + fprintf(stderr, "%s\n", "Failed to set an error buffer for curl"); + state = -1; + } else if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK) { + fprintf(stderr, "%s\n", "Failed to set the URL for curl"); + state = -1; + } else if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK) { + fprintf(stderr, "%s\n", "Failed to set write function for curl"); + state = -1; + } else if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, fptr) != CURLE_OK) { + fprintf(stderr, "%s\n", "Failed to set output for curl"); + state = -1; + } else if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) { + fprintf(stderr, "%s\n", "Failed to set follow location for curl"); + state = -1; + } else if ((status = curl_easy_perform(curl)) && status != CURLE_OK) { + fprintf(stderr, "%s\n", curl_easy_strerror(status)); + state = status; + } + + curl_easy_cleanup(curl); + curl_global_cleanup(); + } + + if (fptr) + fclose(fptr); + + return state; +} + +void print_line(int padding, const char *format, char *line) +{ + if (padding > 0) + fprintf(stdout, "%*c", PADDING(padding), ' '); + + if (!((global_options >> 1) & 1U)) + fprintf(stdout, format, line); + else + fprintf(stdout, "%s\n", line); +} + +void handle_line(char *input) +{ + char *temp_line = (char*)malloc(strlen(input) + 1 * (sizeof(char))); + char *ptr = temp_line; + char *line = NULL; + char *next = NULL; + char heading = 0x0; + int b = 0; /* beginning */ + int e = 0; /* end */ + int limit = terminal_width - PADDING(3); + + strcpy(temp_line, input); + string_trim(temp_line); + + /* make sure first section line contains text */ + if (*temp_line || (section_line_count > 0 && !blank_line)) { + + /* check and handle headings, (h2-h4) */ + if (*ptr++ == '=' && *ptr++ == '=' && *ptr) { + if (*ptr++ == '=' && *ptr-- == '=') { + string_remove(temp_line, "===="); + heading = 0x4; + } else if (*ptr == '=') { + string_remove(temp_line, "==="); + heading = 0x3; + } else { + ptr--; + /* capitalize 'lowest' heading */ + while (*ptr) { + if (*ptr > 0x60 && *ptr < 0x7b) + *ptr -= 0x20; + if ((unsigned char)*(ptr - 1) == 0xc3 + && (unsigned char)*ptr >= 0xa0 + && (unsigned char)*ptr <= 0xbe) + *ptr -= 0x20; + ptr++; + } + string_remove(temp_line, "=="); + heading = 0x2; + } + + pre_heading = false; + section_line_count = -1; + } + + /* prevent lines from breaking in the middle of words */ + ptr = temp_line; + while (*ptr) { + + if (*ptr == ' ') + e = INDEX_OF(ptr, temp_line); + + if (INDEX_OF(ptr, temp_line) - b >= limit) { + if (e - b > 0) + temp_line[e] = '\n'; + b = e + 1; + } + + ptr++; + } + + string_trim(temp_line); + line = temp_line; + + /* format and output lines */ + while (line) { + next = strchr(line, '\n'); + if (next) + *next = '\0'; + + if (pre_heading) { + print_line(2, "\033[3m%s\033[0m\n", line); + } else if (heading) { + switch (heading) { + case 0x2: + print_line(0, "\033[1m%s\033[0m\n", line); + break; + case 0x3: + print_line(1, "\033[1m%s\033[0m\n", line); + break; + default: + print_line(2, "\033[1m\033[3m%s\033[0m\n", line); + break; + } + } else { + print_line(2, "%s\n", line); + } + + if (next) + *next = '\n'; + line = next ? next + 1 : NULL; + } + + blank_line = *temp_line == '\0'; + total_line_count++; + section_line_count++; + } + + free(temp_line); +} + +int print_wiki(char *subject) +{ + const char beginning[] = "\"extract\":\""; + bool failure = false; + FILE *fptr = NULL; + char *content = NULL; + char *line = NULL; + char *next = NULL; + char *ptr = NULL; + long size; + + fptr = fdopen(dup(temp_file_descriptor), "rb"); + if (fptr == NULL) + failure = true; + + if (!failure) { + fseek(fptr, 0, SEEK_END); + size = ftell(fptr); + rewind(fptr); + content = malloc(size + 1 * (sizeof(char))); + fread(content, sizeof(char), size, fptr); + fclose(fptr); + } + + if (content == NULL) + failure = true; + + if (!failure) { + ptr = strstr(content, beginning); + if (ptr != NULL && *ptr) + memmove(content, + ptr + strlen(beginning), + strlen(ptr) - strlen(beginning) + ); + else + failure = true; + } + + if (!failure) { + ptr = strchr(content, '}'); + if (ptr != NULL && *ptr--) + *ptr = '\0'; + else + failure = true; + } + + if (!failure) { + dictionary_replace(content, cleanup_mappings, LENGTH(cleanup_mappings)); + + if ((global_options >> 0) & 1U) { + fprintf(stdout, "%s\n", content); + } else { + string_remove_redundent_spaces(content); + unicode_decode(content); + + if (!((global_options >> 1) & 1U)) + fprintf(stdout, "\033[4m%s:\033[0m\n\n", subject); + + line = content; + while (line) { + next = strchr(line, '\n'); + if (next) + *next = '\0'; + + handle_line(line); + + if (next) + *next = '\n'; + line = next ? next + 1 : NULL; + } + } + } + + if (content != NULL) + free(content); + + return !failure; +} + +int main(int argc, char *argv[]) +{ + int state = 0; + char *input = NULL; + char *url = NULL; + int buffer_size = 80; + char buffer[buffer_size]; + int input_size; + char c; + + /* validate input from STDIN */ + if (!isatty(fileno(stdin)) && fgets(buffer, buffer_size, stdin) != NULL) { + input_size = strlen(buffer); + input = (char*)malloc(input_size); + input[0] = '\0'; + strcat(input, buffer); + + /* validate input from CLI */ + } else if (argc >= 2) { + input_size = strlen(argv[argc - 1]); + input = (char*)malloc(input_size); + input[0] = '\0'; + strcat(input, argv[argc - 1]); + + /* no valid input, so show usage */ + } else { + fprintf(stderr, "%s\n", usage); + state = 1; + } + + if (state == 0) { + while ((c = getopt(argc, argv, "hl:rt")) != -1) { + switch (c) { + case 'l': + memmove(language, optarg, 2); + break; + case 'r': + global_options |= 1U << 0; + break; + case 't': + global_options |= 1U << 1; + break; + case 'h': + fprintf(stderr, "%s\n", usage); + fprintf(stderr, "Finds a wiki subject and prints it.\n"); + fprintf(stderr, " -h Print this help text and exit\n"); + fprintf(stderr, " -l CODE Language code in ISO 639-1 format\n"); + fprintf(stderr, " -r Print wiki in raw format\n"); + fprintf(stderr, " -t Print wiki in text only format\n"); + + free(input); + return 0; + default: + fprintf(stderr, "%s\n", usage); + state = 1; + } + } + } + + /* initialization is done, so proceed with the rest of the program... */ + terminal_width = get_terminal_width(); + + if (state == 0) + url = get_wiki_url(input); + + if (state == 0 && download_wiki(url) != CURLE_OK) { + fprintf(stderr, "%s\n", "Failed to download the wiki"); + state = 1; + } + + if (state == 0 && !print_wiki(input)) { + fprintf(stderr, "%s\n", "Failed to print the wiki"); + state = 1; + } + + if (temp_file_descriptor != -1) { + close(temp_file_descriptor); + unlink(temp_file); + } + + if (input != NULL) + free(input); + + if (url != NULL) + free(url); + + return state; +}