Get all images from a board from a Pinterest web address

后端 未结 5 1264
遥遥无期
遥遥无期 2021-02-09 05:50

This question sounds easy, but it is not as simple as it sounds.

Brief summary of what\'s wrong

For an example, use this board; http://pinterest

5条回答
  •  伪装坚强ぢ
    2021-02-09 06:37

    #!/usr/bin/env bash 
    ##
    ## File: getpins.bsh 
    ## 
    ## Copyrighted by +A.M.Danischewski  2016+ (c)
    ## This program may be reutilized without limits, provided this 
    ## notice remain intact. 
    
    ## If this breaks one day, then just fire up firefox Developer Tools and check the network traffic to 
    ## capture "copy as curl" of the calls to the search page (filter with BaseSearchResource), then the 
    ## call to feed more data (filter with SearchResource). 
    ## 
    ## Do a search on whatever you want remove the cookie header, and add -o ret2.html -D h2.txt -c c1.txt, 
    ## then search replace the search terms as SEARCHTOKEN1 and SEARCHTOKEN2. 
    ## 
    ## Description this script facilitates alternate browsers, by caching images/pins 
    ## from pinterest. This script is hardwired for two search terms. First create a directory 
    ## to where you want the images to go, then cd there. 
    ##  Usage: 
    ##    $> cd /big/drive/auto_gyros 
    ##    $> getpins.bsh "sleek autogyros"
    ## 
    ## Expect around 900 images to land wherever you select, so make sure you have space! =) 
    ##
    
    declare -r ORIG_IMGS="pin_orig_imgs.txt"
    declare -r TMP_IMGS="pin_imgs.txt"
    declare -r UA_HEADER="User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:19.$(($RANDOM%10))) Gecko/20100101 Firefox/19.0"
    
     ## Say Hello to the main page and get a cookie. 
    declare PINCMD1=$(cat << EOF
    curl -o ret1.html -D h1.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Connection: keep-alive' 'https://www.pinterest.com/'
    EOF
    )
     ## Start a search for our dear search terms. 
    declare PINCMD2=$(cat << EOF
    curl -H 'X-APP-VERSION: ea7a93a' -o ret2.html -D h2.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'X-Pinterest-AppState: active' -H 'X-NEW-APP: 1'  -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: https://www.pinterest.com' -H 'Connection: keep-alive' 'https://www.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Fq%3DSEARCHTOKEN1%2520SEARCHTOKEN2%26rs%3Dtyped%260%3DSEARCHTOKEN1%257Ctyped%261%3DSEARCHTOKEN2%257Ctyped&data=%7B%22options%22%3A%7B%22restrict%22%3Anull%2C%22scope%22%3A%22pins%22%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%7D%2C%22context%22%3A%7B%7D%2C%22module%22%3A%7B%22name%22%3A%22SearchPage%22%2C%22options%22%3A%7B%22restrict%22%3Anull%2C%22scope%22%3A%22pins%22%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%7D%7D%2C%22render_type%22%3A1%2C%22error_strategy%22%3A0%7D&module_path=App%3EHeader%3ESearchForm%3ETypeaheadField(support_guided_search%3Dtrue%2C+resource_name%3DAdvancedTypeaheadResource%2C+tags%3Dautocomplete%2C+class_name%3DbuttonOnRight%2C+prefetch_on_focus%3Dtrue%2C+support_advanced_typeahead%3Dnull%2C+hide_tokens_on_focus%3Dundefined%2C+search_on_focus%3Dtrue%2C+placeholder%3DSearch%2C+show_remove_all%3Dtrue%2C+enable_recent_queries%3Dtrue%2C+name%3Dq%2C+view_type%3Dguided%2C+value%3D%22%22%2C+input_log_element_type%3D227%2C+populate_on_result_highlight%3Dtrue%2C+search_delay%3D0%2C+is_multiobject_search%3Dtrue%2C+type%3Dtokenized%2C+enable_overlay%3Dtrue)&_=1454779874891' 
    EOF
    )
     ## Load further images. 
    declare PINCMD3=$(cat << EOF
    curl -H 'X-APP-VERSION: ea7a93a' -D h3.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'X-Pinterest-AppState: active' -H 'X-NEW-APP: 1'  -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: https://www.pinterest.com' -H 'Connection: keep-alive' 'https://www.pinterest.com/resource/SearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Fq%3DSEARCHTOKEN1%2520SEARCHTOKEN2%26rs%3Dtyped%260%3DSEARCHTOKEN1%257Ctyped%261%3DSEARCHTOKEN2%257Ctyped&data=%7B%22options%22%3A%7B%22layout%22%3Anull%2C%22places%22%3Afalse%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%2C%22scope%22%3A%22pins%22%2C%22bookmarks%22%3A%5B%22_NEW_BOOK_MARK_%22%5D%7D%2C%22context%22%3A%7B%7D%7D&module_path=App%3EHeader%3ESearchForm%3ETypeaheadField(support_guided_search%3Dtrue%2C+resource_name%3DAdvancedTypeaheadResource%2C+tags%3Dautocomplete%2C+class_name%3DbuttonOnRight%2C+prefetch_on_focus%3Dtrue%2C+support_advanced_typeahead%3Dnull%2C+hide_tokens_on_focus%3Dundefined%2C+search_on_focus%3Dtrue%2C+placeholder%3DSearch%2C+show_remove_all%3Dtrue%2C+enable_recent_queries%3Dtrue%2C+name%3Dq%2C+view_type%3Dguided%2C+value%3D%22%22%2C+input_log_element_type%3D227%2C+populate_on_result_highlight%3Dtrue%2C+search_delay%3D0%2C+is_multiobject_search%3Dtrue%2C+type%3Dtokenized%2C+enable_overlay%3Dtrue)&_=1454779874911'
    EOF
    )
     ## Exactly 2 search terms in a single string are expected, you can hack it up if 
     ## you want something else.  
    declare SEARCHTOKEN1=$(echo "${1}" | cut -d " " -f1)
    declare SEARCHTOKEN2=$(echo "${1}" | cut -d " " -f2)
    
    PINCMD3=$(sed "s/SEARCHTOKEN1/${SEARCHTOKEN1}/g" <<< "${PINCMD3}") 
    PINCMD3=$(sed "s/SEARCHTOKEN2/${SEARCHTOKEN2}/g" <<< "${PINCMD3}") 
    PINCMD2=$(sed "s/SEARCHTOKEN1/${SEARCHTOKEN1}/g" <<< "${PINCMD2}") 
    PINCMD2=$(sed "s/SEARCHTOKEN2/${SEARCHTOKEN2}/g" <<< "${PINCMD2}") 
    
    function lspinimgs() { grep -o "\"url\": \"http[s]*://[^\"]*.pinimg.com[^\"]*.jpg\"" "${1}" | cut -d " " -f2 | tr -d "\""; }
    function mkpinorig() { sed "s#\(^http.*\)\(com/\)\([^/]*\)\(/.*jpg\$\)#\1\2originals\4#g" "${1}" > "${2}"; }    
    function getpinbm() { grep -o "bookmarks\": [^ ]* "  "${1}" | sed "s/^book.*\[\"//g;s/\"\].*\$//g" | sort | uniq | grep -v "-end-"; }
    function changepinbm() { PINCMD3=$(sed "s/\(^.*\)\(bookmarks%22%3A%5B%22\)\(.*\)\(%22%5D.*\$\)/\1\2${1}\4/g" <<< "${PINCMD3}"); }
    function cleanup() { rm ret*html c1.txt "${TMP_IMGS}" h{1..3}.txt "${ORIG_IMGS}"; } 
    
    function main() { 
    eval "${PINCMD1}" 
    eval "${PINCMD2}"
    for ((i=3,lasti=2; i<10000; i++,lasti++)); do 
     pinbm=$(getpinbm "ret${lasti}.html")
     [[ -z "${pinbm}" ]] && break 
     changepinbm "${pinbm}"
     eval "${PINCMD3}" > "ret${i}.html"
    done 
    for a in *.html; do lspinimgs "${a}" >> "${TMP_IMGS}"; done
    mkpinorig "${TMP_IMGS}" "${ORIG_IMGS}"
    IFS=$(echo -en "\n\b") && for a in $(sort "${ORIG_IMGS}" | uniq); do 
     wget --tries=3 -E -e robots=off -nc --random-wait --content-disposition --no-check-certificate -p --restrict-file-names=windows,lowercase,ascii --header "${UA_HEADER}" -nd "$a"  
    done
    cleanup 
    } 
    
    main 
    exit 0
    

提交回复
热议问题