利用libcurl下载图片

非 Y 不嫁゛ 提交于 2020-01-23 02:38:37
//Crawl.cpp

#include "Crawl.h"


using namespace std;
 
CCrawl::CCrawl()
{
}

CCrawl::~CCrawl()
{
}

size_t  CCrawl::WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{

	size_t realsize = size * nmemb;

	struct MemoryStruct *mem = (struct MemoryStruct *)userp;

	char *ptr = (char *)realloc(mem->memory, mem->size + realsize + 1);

	if(ptr == NULL) {
		    /* out of memory! */ 
		printf("not enough memory (realloc returned NULL)\n");
	//	exit(1);
		return 0;
	}

	mem->memory = ptr;
	memcpy(&(mem->memory[mem->size]), contents, realsize);
	mem->size += realsize;
	mem->memory[mem->size] = 0;

	return realsize;
}
 

int CCrawl::fetch(string strUrl, char **fileBuf , size_t &imgSize)
{

	CURL *curl;

	CURLcode res;

	struct MemoryStruct chunk;

	struct MemoryStruct DataChunk;


	chunk.memory = (char *)malloc(1);  /* will be grown as needed by the realloc above */ 
	chunk.size = 0;    /* no data at this point */ 


	DataChunk.memory = (char *)malloc(1);  /* will be grown as needed by the realloc above */ 
	DataChunk.size = 0;    /* no data at this point */ 

//	curl_global_init(CURL_GLOBAL_ALL);

	/* init the curl session */ 
	curl = curl_easy_init();

	/* specify URL to get */
	curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());

	/* complete within 20 seconds */

	curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L);


	/* send all data to this function  */

	curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, WriteMemoryCallback);

	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);


	/* we pass our 'chunk' struct to the callback function */ 
	curl_easy_setopt(curl,  CURLOPT_WRITEHEADER, (void *)&chunk);

	curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&DataChunk);

	/* some servers don't like requests that are made without a user-agent field, so we provide one */ 
	curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");

	/* get it! */ 
	res = curl_easy_perform(curl);

	/* check for errors */ 
	if(res != CURLE_OK) {
		fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
		return 1;
	}else{

	    /*
	     * Now, our chunk.memory points to a memory block that is chunk.size
	     * bytes big and contains the remote file.
	     */ 

	//	printf("\n%lu bytes retrieved ;\n\n\n", (unsigned long)chunk.size);
	//	printf("%s \n\n\n Finished ...\n\n\n", chunk.memory);
	//	printf("%s \n\n\n\n\n Finished ...\n", (unsigned long)DataChunk.memory);

	//	*fileBuf =  chunk.memory;

		imgSize = DataChunk.size;
		*fileBuf =  DataChunk.memory;
	}

	/* cleanup curl stuff */ 

	curl_easy_cleanup(curl);

//BUG	free(chunk.memory);
//	free(DataChunk.memory);

	/* we're done with libcurl, so clean it up */ 

//	curl_global_cleanup();


	return 0;
}


//See more: https://blog.51cto.com/fengyuzaitu/2434920

//_________________________________________________

size_t CCrawl::WriteFile(void *ptr, size_t size, size_t nmemb, void *stream)
{
	std::ofstream* ofs = (std::ofstream*)stream;
	size_t nLen = size * nmemb;
	ofs->write((char*)ptr, nLen);
	return nLen;
}

void CCrawl::TestStorePhotoFileFromUrl(std::string strUrl)
{
	std::ofstream ofs;

	ofs.open("img.jpg", std::ios::out | std::ios::binary);

	std::string strPhotoBuffer;

	CURL *pCurlHandle;

	pCurlHandle = curl_easy_init();

	curl_easy_setopt(pCurlHandle, CURLOPT_URL, strUrl.c_str());
	curl_easy_setopt(pCurlHandle, CURLOPT_WRITEDATA, &ofs);
	curl_easy_setopt(pCurlHandle, CURLOPT_WRITEFUNCTION, WriteFile);

	CURLcode nCurlRet = curl_easy_perform(pCurlHandle);

	if ((nCurlRet != CURLE_OK) && (nCurlRet != CURLE_WRITE_ERROR)){
		std::cout << "通过LibCurl获取:" << strUrl << "图片失败,错误码是:" << nCurlRet;
	}

	ofs.close();
	curl_easy_cleanup(pCurlHandle);
}

//store to memory

size_t CCrawl::WriteBuffer(void *ptr, size_t size, size_t nmemb, void *stream)
{
	std::string* pStrBuffer = (std::string*)stream;
	size_t nLen = size * nmemb;
	pStrBuffer->append((char*)ptr, nLen);
	return nLen;
}

void CCrawl::TestStoreBufferFromUrl(std::string strUrl)
{

	std::string strPhotoBuffer;
	CURL *pCurlHandle;
	pCurlHandle = curl_easy_init();
	curl_easy_setopt(pCurlHandle, CURLOPT_URL, strUrl.c_str());
	curl_easy_setopt(pCurlHandle, CURLOPT_WRITEDATA, &strPhotoBuffer);
	curl_easy_setopt(pCurlHandle, CURLOPT_WRITEFUNCTION, WriteBuffer);
	CURLcode nCurlRet = curl_easy_perform(pCurlHandle);
	if ((nCurlRet != CURLE_OK) && (nCurlRet != CURLE_WRITE_ERROR)){
		std::cout << "通过LibCurl获取:" << strUrl << "图片失败,错误码是:" << nCurlRet;
	}else{
		std::ofstream ofs;
		ofs.open("img2.jpg", std::ios::out | std::ios::binary);
		ofs << strPhotoBuffer;
		ofs.close();
	}
		curl_easy_cleanup(pCurlHandle);
}


int CCrawl::AddUrl(string InputFile)
{
	string strUrl;

	// open the seed url file
	ifstream ifsSeed(InputFile.c_str());
	if (!ifsSeed){
		return 1;
	} 

	string::size_type idx;

	for(int i=0;i<1000;i++){

		if ( !getline(ifsSeed,strUrl) )
			break;

		if(((idx = strUrl.find("Root:")) != string::npos)){
			continue;
		}

		setImgUrl.insert(strUrl);
	}

	ifsSeed.close();
	return 0;
}

//Crawl.h

#ifndef _Crawl_H_191220_
#define _Crawl_H_191220_

#include <string>
#include <fstream>
#include <iostream>
#include <stdio.h>

#include <stdlib.h>

#include <stddef.h>
#include <unistd.h>

#include <string>
#include <set>

#include <string.h>

#include <algorithm>
#include <sstream>
#include <sys/io.h>
#include <fcntl.h>

#include <sys/wait.h>

#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/types.h>  
#include <sys/ipc.h> 
#include <sys/sem.h>  
#include <sys/msg.h>

#include <errno.h>
#include <signal.h>

#include <curl/curl.h>


using namespace std;

class CCrawl
{
public:
	string HeaderInf;

public:
	CCrawl();
	~CCrawl();

	int fetch(string strUrl, char **fileBuf, size_t &nmemb );

	static void TestStorePhotoFileFromUrl(std::string strUrl);

	static void TestStoreBufferFromUrl(std::string strUrl);


private:
	set<string> setImgUrl;
	struct MemoryStruct {
		char *memory;
		size_t size;
	};

private:
	int AddUrl(string InputFile);
	static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp);
	static size_t  save_header(void *ptr, size_t size, size_t nmemb, FILE *fp);
	static size_t WriteFile(void *ptr, size_t size, size_t nmemb, void *stream);
	static size_t WriteBuffer(void *ptr, size_t size, size_t nmemb, void *stream);
          
};
#endif
//micSky.cpp

#include <string>
#include <fstream>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "Crawl.h"
 
//https://curl.haxx.se/libcurl/c/getinmemory.html

int main(int arc, char* arv[])
{

	size_t imgSize ;

	char *fileHead = NULL;

	CCrawl iCrawl;

	string strUrl;

	curl_global_init(CURL_GLOBAL_ALL);


	// open the seed url file
	ifstream ifsSeed("imgUrl");
	if (!ifsSeed){
		exit(1);
	} 

	string::size_type idx;

	static long Init = 90000;

	long int i=0;
	for(;;i++){//each time read a group of lines.

		if ( !getline(ifsSeed,strUrl) )
			break;

		if(i<Init) continue;


		if(((idx = strUrl.find("Root:")) != string::npos)){
			if( i> Init + 1000) break;
			continue;
		}

		string dom;
		string::size_type idx;

		if( (idx = strUrl.rfind(".")) != string::npos ){
			dom = strUrl.substr(idx);
		}else // impossible .
			cerr << "Error 1; " << endl;


		int bet = iCrawl.fetch(strUrl, &fileHead, imgSize );

		if(imgSize<500) continue;

		if(bet!=1){


			char food[128];
			sprintf(food,"Img%d%s",i,dom.c_str());



/***
		std::ofstream ofs;
		ofs.open(food, std::ios::out | std::ios::binary);
		ofs << fileHead;
		ofs.close();

//*/

//*

			FILE *fp;

			fp=fopen(food,"wb");

			if(!fp){
			//	printf("/Spider/imgDown Error: can not open the file.\n");
				cerr << "Error 2; can not open the file.." << strUrl << endl;
				exit(1);
			}

			int ret=fwrite(fileHead,imgSize,1,fp);

			if(ret!=1){
				cerr << "Error 3; can not write the pixel data..  " <<  i << " ).  " << strUrl <<  endl;
			}

			fclose(fp);
//*/

		}else
			cerr << "Error 4; can not fetch the net page..  " <<  i << " ).  " << strUrl <<  endl;


		cout << i << " ).  " << strUrl << " imgSize : "<< imgSize <<endl;


		if (fileHead){
			free(fileHead); fileHead=NULL;
			imgSize = 0;
		}

	}//_for

	ifsSeed.close();

	curl_global_cleanup();


	exit(0);


	iCrawl.TestStorePhotoFileFromUrl(strUrl);
	iCrawl.TestStoreBufferFromUrl(strUrl);


	exit(0);
}





#CMakeList.txt

cmake_minimum_required(VERSION 2.8)

project( Sky )

find_package(CURL REQUIRED)

include_directories(${CURL_INCLUDE_DIR})


aux_source_directory(. SRC_LIST)

add_executable(${PROJECT_NAME} ${SRC_LIST})

target_link_libraries(${PROJECT_NAME} ${CURL_LIBRARY})


include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11)
CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X)

if(COMPILER_SUPPORTS_CXX11)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
elseif(COMPILER_SUPPORTS_CXX0X)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
else()
     message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.")
endif()

The result is satisfactory …

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!