问题
I used the following code to connect Flink to ElasticSearch. But when running with Flink, a lot of errors are displayed.The program first enters the data from a port and then reads each line in the command line according to the program written. It then displays the number of words. The main problem is when connecting to a elasticsearch that unfortunately gives error when connecting. Are these errors? What classes do you need to connect Minimal Flink to Elastic Search?
public class Elastic {
public static void main(String[] args) throws Exception {
// the port to connect to
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount --port <port>'");
return;
}
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream("localhost", port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<WordWithCount> windowCounts = text
.flatMap(new FlatMapFunction<String, WordWithCount>() {
@Override
public void flatMap(String value, Collector<WordWithCount> out) {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word")
.timeWindow(Time.seconds(5), Time.seconds(1))
.reduce(new ReduceFunction<WordWithCount>() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
return new WordWithCount(a.word, a.count + b.count);
}
});
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1);
text.print().setParallelism(1);
env.execute("Socket Window WordCount");
List<HttpHost> httpHosts = new ArrayList<HttpHost>();
httpHosts.add(new HttpHost("127.0.0.1", 9200, "http"));
httpHosts.add(new HttpHost("10.2.3.1", 9200, "http"));
httpHosts.add(new HttpHost("my-ip",9200,"http"));
ElasticsearchSink.Builder<String> esSinkBuilder = new ElasticsearchSink.Builder<String>(
httpHosts,
new ElasticsearchSinkFunction<String>() {
public IndexRequest createIndexRequest(String element) {
Map<String, String> json = new HashMap<String, String>();
json.put("data", element);
return Requests.indexRequest()
.index("iran")
.type("int")
.source(json);
}
@Override
public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
indexer.add(createIndexRequest(element));
}
}
);
esSinkBuilder.setBulkFlushMaxActions(1);
final Header[] defaultHeaders = new Header[]{new BasicHeader("header", "value")};
esSinkBuilder.setRestClientFactory(new RestClientFactory() {
@Override
public void configureRestClientBuilder(RestClientBuilder restClientBuilder) {
restClientBuilder.setDefaultHeaders(defaultHeaders)
.setMaxRetryTimeoutMillis(10000)
.setPathPrefix("a")
.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() {
@Override
public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder builder) {
return builder.setSocketTimeout(10000);
}
});
}
});
text.addSink(esSinkBuilder.build());
}
// Data type for words with count
public static class WordWithCount {
public String word;
public long count;
public WordWithCount() {
}
public WordWithCount(String word, long count) {
this.word = word;
this.count = count;
}
@Override
public String toString() {
return word + " : " + count;
}
}
}
my elasticsearch version: 7.5.0 my flink version: 1.8.3
my error:
sudo /etc/flink-1.8.3/bin/flink run -c org.apache.flink.Elastic /root/FlinkElastic-1.0.jar --port 9000
------------------------------------------------------------
The program finished with the following exception:
java.lang.RuntimeException: Could not look up the main(String[]) method from the class
org.apache.flink.Elastic:
org/apache/flink/streaming/connectors/elasticsearch/ElasticsearchSinkFunction
at org.apache.flink.client.program.PackagedProgram.hasMainMethod(PackagedProgram.java:527)
at org.apache.flink.client.program.PackagedProgram.<init>(PackagedProgram.java:246)
... 7 more
Caused by: java.lang.NoClassDefFoundError:
org/apache/flink/streaming/connectors/elasticsearch/ElasticsearchSinkFunction
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
at java.lang.Class.privateGetMethodRecursive(Class.java:3048)
at org.apache.flink.client.program.PackagedProgram.hasMainMethod(PackagedProgram.java:521)
... 7 more
Caused by: java.lang.ClassNotFoundException:
org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoaders$ChildFirstClassLoader.loadClass(FlinkUserCodeClassLoaders.java:120)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 13 more
my pom:
<groupId>org.apache.flink</groupId>
<artifactId>FlinkElastic</artifactId>
<version>1.0</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>6</source>
<target>6</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.11</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.8.3</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.8.3</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.8.3</version>
</dependency>
</dependencies>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
回答1:
A couple of things:
Flink doesn't yet support Elasticsearch 7. An ES7 connector will be released along with Flink 1.10.
You must include the flink/elasticsearch dependency in your project -- this error suggests you haven't included it:
ClassNotFoundException: org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction
See the elasticsearch docs for more info.
Your Flink application code runs in the task managers. Each task manager must be able to find all of your application's dependencies in its CLASSPATH. The connector classes are not included out-of-the-box, so you will need to either build an uber jar (i.e., a fat jar, or jar with dependencies), or copy the flink-connector-elasticsearch6_2.11 jar file into the lib directory of every machine in the cluster. See the docs on connector dependencies for more details.
回答2:
Please find the Flink Elastic Connector code here. I have used the following dependencies and versions mentioned below.
- Flink: 1.10.0
- ElasticSearch: 7.6.2
- flink-connector-elasticsearch7
- Scala: 2.12.11
- SBT: 1.2.8
- Java: 11.0.4
Point to be noted here:
- Since ElasticSearch 6.x onwards they started full support of the REST elastic client. And till Elastic5.x they were using Transport elastic client.
1. Flink DataStream
val inputStream: DataStream[(String, String)] = ...
ESSinkService.sinkToES(inputStream, index)
2. ElastiSearchSink Function
package demo.elastic
import org.apache.flink.streaming.api.scala._
import org.apache.log4j._
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.connectors.elasticsearch7.{ElasticsearchSink, RestClientFactory}
import org.apache.flink.streaming.connectors.elasticsearch.{ActionRequestFailureHandler, ElasticsearchSinkFunction, RequestIndexer}
import org.apache.http.HttpHost
import org.elasticsearch.client.{Requests, RestClientBuilder}
import org.elasticsearch.common.xcontent.XContentType
import org.elasticsearch.action.ActionRequest
import org.apache.flink.streaming.api.datastream.DataStreamSink
class ESSinkService {
val logger = Logger.getLogger(getClass.getName)
val httpHosts = new java.util.ArrayList[HttpHost]
httpHosts.add(new HttpHost("localhost", 9200, "http"))
httpHosts.add(new HttpHost("localhost", 9200, "http"))
def sinkToES(counted: DataStream[(String, String)], index: String): DataStreamSink[(String, String)] = {
val esSinkBuilder = new ElasticsearchSink.Builder[(String, String)](
httpHosts, new ElasticsearchSinkFunction[(String, String)] {
def process(element: (String, String), ctx: RuntimeContext, indexer: RequestIndexer) {
indexer.add(Requests.indexRequest
.index(element._2 + "_" + index)
.source(element._1, XContentType.JSON))
}
}
)
esSinkBuilder.setBulkFlushMaxActions(2)
esSinkBuilder.setBulkFlushInterval(1000L)
esSinkBuilder.setFailureHandler(new ActionRequestFailureHandler {
override def onFailure(actionRequest: ActionRequest, throwable: Throwable, i: Int, requestIndexer: RequestIndexer): Unit = {
println("@@@@@@@On failure from ElasticsearchSink:-->" + throwable.getMessage)
}
})
esSinkBuilder.setRestClientFactory(new RestClientFactory {
override def configureRestClientBuilder(restClientBuilder: RestClientBuilder): Unit = {
/*restClientBuilder.setDefaultHeaders(...)
restClientBuilder.setMaxRetryTimeoutMillis(...)
restClientBuilder.setPathPrefix(...)
restClientBuilder.setHttpClientConfigCallback(...)*/
}
})
counted.addSink(esSinkBuilder.build())
}
}
object ESSinkService extends ESSinkService
Note: For more details click here.
来源:https://stackoverflow.com/questions/59453477/flink-elasticsearch-connector