From 8ca26cbb43f8ff2b0b5a2c49d8e11a2f4dd54b79 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Sun, 18 Feb 2024 22:04:41 +0100 Subject: nginx: self-hosted freq used docs /pub/ --- roles/webserver/files/user_conf.d/memzero.conf | 6 ++ roles/webserver/files/www/memzero/pub/.gen.sh | 99 +++++++++++++++++++ roles/webserver/files/www/memzero/pub/.gitignore | 2 + .../memzero/pub/abi/sysv/sysv-gabi4-2013/.fetch.sh | 24 +++++ .../files/www/memzero/pub/references.html | 108 +++++++++++++++++++++ roles/webserver/files/www/memzero/robots.txt | 2 + 6 files changed, 241 insertions(+) create mode 100644 roles/webserver/files/www/memzero/pub/.gen.sh create mode 100644 roles/webserver/files/www/memzero/pub/.gitignore create mode 100644 roles/webserver/files/www/memzero/pub/abi/sysv/sysv-gabi4-2013/.fetch.sh create mode 100644 roles/webserver/files/www/memzero/pub/references.html create mode 100644 roles/webserver/files/www/memzero/robots.txt (limited to 'roles/webserver/files') diff --git a/roles/webserver/files/user_conf.d/memzero.conf b/roles/webserver/files/user_conf.d/memzero.conf index ed3bd9d..2da3a73 100644 --- a/roles/webserver/files/user_conf.d/memzero.conf +++ b/roles/webserver/files/user_conf.d/memzero.conf @@ -10,6 +10,12 @@ server { include /etc/nginx/inc/ssl.conf; root /www/memzero; + + location /pub/ { + autoindex on; + # https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag + add_header X-Robots-Tag "noindex, nofollow, nosnippet, noarchive"; + } } server { diff --git a/roles/webserver/files/www/memzero/pub/.gen.sh b/roles/webserver/files/www/memzero/pub/.gen.sh new file mode 100644 index 0000000..9a8030e --- /dev/null +++ b/roles/webserver/files/www/memzero/pub/.gen.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +OUT=references.html + +cat < $OUT + + + + + + references + + + + +

Self-hosted collection of frequently referenced documents and specifications. This page provides links to the original sources.

+

filter:

+ + + +EOF diff --git a/roles/webserver/files/www/memzero/pub/.gitignore b/roles/webserver/files/www/memzero/pub/.gitignore new file mode 100644 index 0000000..ca51e62 --- /dev/null +++ b/roles/webserver/files/www/memzero/pub/.gitignore @@ -0,0 +1,2 @@ +*.pdf +abi/sysv/sysv-gabi4-2013/*.html diff --git a/roles/webserver/files/www/memzero/pub/abi/sysv/sysv-gabi4-2013/.fetch.sh b/roles/webserver/files/www/memzero/pub/abi/sysv/sysv-gabi4-2013/.fetch.sh new file mode 100644 index 0000000..0ff7818 --- /dev/null +++ b/roles/webserver/files/www/memzero/pub/abi/sysv/sysv-gabi4-2013/.fetch.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +rec_fetch() { + echo fetching $1 + curl --no-progress-meter https://www.sco.com/developers/gabi/latest/$1 -o $1 + + # Handle following link tags: + # + # + # + # + # + for page in $(grep href $1 |\ + sed 's/.*href="\{0,1\}\([a-zA-Z._0-9]*\)"\{0,1\}[#>].*/\1/g' |\ + sort | uniq); do + if [[ -z $page || -f $page || ${page##*.} != html ]]; then + continue; + fi + + rec_fetch $page + done +} + +rec_fetch contents.html diff --git a/roles/webserver/files/www/memzero/pub/references.html b/roles/webserver/files/www/memzero/pub/references.html new file mode 100644 index 0000000..1f7c71e --- /dev/null +++ b/roles/webserver/files/www/memzero/pub/references.html @@ -0,0 +1,108 @@ + + + + + + references + + + + +

Self-hosted collection of frequently referenced documents and specifications. This page provides links to the original sources.

+

filter:

+
+ + diff --git a/roles/webserver/files/www/memzero/robots.txt b/roles/webserver/files/www/memzero/robots.txt new file mode 100644 index 0000000..44b0e56 --- /dev/null +++ b/roles/webserver/files/www/memzero/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: /pub/ -- cgit v1.2.3