This question sounds easy, but it is not as simple as it sounds.
Brief summary of what\'s wrong
For an example, use this board; http://pinterest
#!/usr/bin/env bash
##
## File: getpins.bsh
##
## Copyrighted by +A.M.Danischewski 2016+ (c)
## This program may be reutilized without limits, provided this
## notice remain intact.
## If this breaks one day, then just fire up firefox Developer Tools and check the network traffic to
## capture "copy as curl" of the calls to the search page (filter with BaseSearchResource), then the
## call to feed more data (filter with SearchResource).
##
## Do a search on whatever you want remove the cookie header, and add -o ret2.html -D h2.txt -c c1.txt,
## then search replace the search terms as SEARCHTOKEN1 and SEARCHTOKEN2.
##
## Description this script facilitates alternate browsers, by caching images/pins
## from pinterest. This script is hardwired for two search terms. First create a directory
## to where you want the images to go, then cd there.
## Usage:
## $> cd /big/drive/auto_gyros
## $> getpins.bsh "sleek autogyros"
##
## Expect around 900 images to land wherever you select, so make sure you have space! =)
##
declare -r ORIG_IMGS="pin_orig_imgs.txt"
declare -r TMP_IMGS="pin_imgs.txt"
declare -r UA_HEADER="User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:19.$(($RANDOM%10))) Gecko/20100101 Firefox/19.0"
## Say Hello to the main page and get a cookie.
declare PINCMD1=$(cat << EOF
curl -o ret1.html -D h1.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Connection: keep-alive' 'https://www.pinterest.com/'
EOF
)
## Start a search for our dear search terms.
declare PINCMD2=$(cat << EOF
curl -H 'X-APP-VERSION: ea7a93a' -o ret2.html -D h2.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'X-Pinterest-AppState: active' -H 'X-NEW-APP: 1' -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: https://www.pinterest.com' -H 'Connection: keep-alive' 'https://www.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Fq%3DSEARCHTOKEN1%2520SEARCHTOKEN2%26rs%3Dtyped%260%3DSEARCHTOKEN1%257Ctyped%261%3DSEARCHTOKEN2%257Ctyped&data=%7B%22options%22%3A%7B%22restrict%22%3Anull%2C%22scope%22%3A%22pins%22%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%7D%2C%22context%22%3A%7B%7D%2C%22module%22%3A%7B%22name%22%3A%22SearchPage%22%2C%22options%22%3A%7B%22restrict%22%3Anull%2C%22scope%22%3A%22pins%22%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%7D%7D%2C%22render_type%22%3A1%2C%22error_strategy%22%3A0%7D&module_path=App%3EHeader%3ESearchForm%3ETypeaheadField(support_guided_search%3Dtrue%2C+resource_name%3DAdvancedTypeaheadResource%2C+tags%3Dautocomplete%2C+class_name%3DbuttonOnRight%2C+prefetch_on_focus%3Dtrue%2C+support_advanced_typeahead%3Dnull%2C+hide_tokens_on_focus%3Dundefined%2C+search_on_focus%3Dtrue%2C+placeholder%3DSearch%2C+show_remove_all%3Dtrue%2C+enable_recent_queries%3Dtrue%2C+name%3Dq%2C+view_type%3Dguided%2C+value%3D%22%22%2C+input_log_element_type%3D227%2C+populate_on_result_highlight%3Dtrue%2C+search_delay%3D0%2C+is_multiobject_search%3Dtrue%2C+type%3Dtokenized%2C+enable_overlay%3Dtrue)&_=1454779874891'
EOF
)
## Load further images.
declare PINCMD3=$(cat << EOF
curl -H 'X-APP-VERSION: ea7a93a' -D h3.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'X-Pinterest-AppState: active' -H 'X-NEW-APP: 1' -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: https://www.pinterest.com' -H 'Connection: keep-alive' 'https://www.pinterest.com/resource/SearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Fq%3DSEARCHTOKEN1%2520SEARCHTOKEN2%26rs%3Dtyped%260%3DSEARCHTOKEN1%257Ctyped%261%3DSEARCHTOKEN2%257Ctyped&data=%7B%22options%22%3A%7B%22layout%22%3Anull%2C%22places%22%3Afalse%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%2C%22scope%22%3A%22pins%22%2C%22bookmarks%22%3A%5B%22_NEW_BOOK_MARK_%22%5D%7D%2C%22context%22%3A%7B%7D%7D&module_path=App%3EHeader%3ESearchForm%3ETypeaheadField(support_guided_search%3Dtrue%2C+resource_name%3DAdvancedTypeaheadResource%2C+tags%3Dautocomplete%2C+class_name%3DbuttonOnRight%2C+prefetch_on_focus%3Dtrue%2C+support_advanced_typeahead%3Dnull%2C+hide_tokens_on_focus%3Dundefined%2C+search_on_focus%3Dtrue%2C+placeholder%3DSearch%2C+show_remove_all%3Dtrue%2C+enable_recent_queries%3Dtrue%2C+name%3Dq%2C+view_type%3Dguided%2C+value%3D%22%22%2C+input_log_element_type%3D227%2C+populate_on_result_highlight%3Dtrue%2C+search_delay%3D0%2C+is_multiobject_search%3Dtrue%2C+type%3Dtokenized%2C+enable_overlay%3Dtrue)&_=1454779874911'
EOF
)
## Exactly 2 search terms in a single string are expected, you can hack it up if
## you want something else.
declare SEARCHTOKEN1=$(echo "${1}" | cut -d " " -f1)
declare SEARCHTOKEN2=$(echo "${1}" | cut -d " " -f2)
PINCMD3=$(sed "s/SEARCHTOKEN1/${SEARCHTOKEN1}/g" <<< "${PINCMD3}")
PINCMD3=$(sed "s/SEARCHTOKEN2/${SEARCHTOKEN2}/g" <<< "${PINCMD3}")
PINCMD2=$(sed "s/SEARCHTOKEN1/${SEARCHTOKEN1}/g" <<< "${PINCMD2}")
PINCMD2=$(sed "s/SEARCHTOKEN2/${SEARCHTOKEN2}/g" <<< "${PINCMD2}")
function lspinimgs() { grep -o "\"url\": \"http[s]*://[^\"]*.pinimg.com[^\"]*.jpg\"" "${1}" | cut -d " " -f2 | tr -d "\""; }
function mkpinorig() { sed "s#\(^http.*\)\(com/\)\([^/]*\)\(/.*jpg\$\)#\1\2originals\4#g" "${1}" > "${2}"; }
function getpinbm() { grep -o "bookmarks\": [^ ]* " "${1}" | sed "s/^book.*\[\"//g;s/\"\].*\$//g" | sort | uniq | grep -v "-end-"; }
function changepinbm() { PINCMD3=$(sed "s/\(^.*\)\(bookmarks%22%3A%5B%22\)\(.*\)\(%22%5D.*\$\)/\1\2${1}\4/g" <<< "${PINCMD3}"); }
function cleanup() { rm ret*html c1.txt "${TMP_IMGS}" h{1..3}.txt "${ORIG_IMGS}"; }
function main() {
eval "${PINCMD1}"
eval "${PINCMD2}"
for ((i=3,lasti=2; i<10000; i++,lasti++)); do
pinbm=$(getpinbm "ret${lasti}.html")
[[ -z "${pinbm}" ]] && break
changepinbm "${pinbm}"
eval "${PINCMD3}" > "ret${i}.html"
done
for a in *.html; do lspinimgs "${a}" >> "${TMP_IMGS}"; done
mkpinorig "${TMP_IMGS}" "${ORIG_IMGS}"
IFS=$(echo -en "\n\b") && for a in $(sort "${ORIG_IMGS}" | uniq); do
wget --tries=3 -E -e robots=off -nc --random-wait --content-disposition --no-check-certificate -p --restrict-file-names=windows,lowercase,ascii --header "${UA_HEADER}" -nd "$a"
done
cleanup
}
main
exit 0