scripts/gen-sitemap.bash (6 lines of code) (raw):

# This script is for the one-off generation of a sitemap # that will include all published articles. aws s3api list-objects-v2 --profile membership --bucket manage-help-content --prefix PROD/articles/ --page-size 99999 \ | jq '.Contents | .[].Key' \ | tail -n +2 \ | sed "s/\"PROD\/articles/https:\/\/manage.theguardian.com\/help-centre\/article/g" \ | sed "s/.json\"//g" \ > sitemap.txt