问题
I have data in Big Query I want to run analytics on in a spark cluster. Per documentation if I instantiate a spark cluster it should come with a Big Query connector. I was looking for any sample code to do this, found one in pyspark. Could not find any c# examples. Also found some documentation on the functions in DataProc APIs nuget package.
Looking for a sample to start a spark cluster in Google cloud using c#.
回答1:
After installing Google.Apis.Dataproc.v1 version 1.10.0.40 (or higher):
Below is a quick sample console app for creating a Dataproc cluster in C#:
using Google.Apis.Auth.OAuth2;
using Google.Apis.Services;
using Google.Apis.Dataproc.v1;
using Google.Apis.Dataproc.v1.Data;
using System;
using System.Threading;
namespace DataprocSample {
class Program
{
static void Main(string[] args)
{
string project = "YOUR PROJECT HERE";
string dataprocGlobalRegion = "global";
string zone = "us-east1-b";
string machineType = "n1-standard-4";
string clusterName = "sample-cluster";
int numWorkers = 2;
// See the docs for Application Default Credentials:
// https://developers.google.com/identity/protocols/application-default-credentials
// In general, a previous 'gcloud auth login' will suffice if running as yourself.
// If running from a VM, ensure the VM was started such that the service account has
// the CLOUD_PLATFORM scope.
GoogleCredential credential = GoogleCredential.GetApplicationDefaultAsync().Result;
if (credential.IsCreateScopedRequired)
{
credential = credential.CreateScoped(new[] { DataprocService.Scope.CloudPlatform });
}
DataprocService service = new DataprocService(
new BaseClientService.Initializer()
{
HttpClientInitializer = credential,
ApplicationName = "Dataproc Sample",
});
// Create a new cluster:
Cluster newCluster = new Cluster
{
ClusterName = clusterName,
Config = new ClusterConfig
{
GceClusterConfig = new GceClusterConfig
{
ZoneUri = String.Format(
"https://www.googleapis.com/compute/v1/projects/{0}/zones/{1}",
project, zone),
},
MasterConfig = new InstanceGroupConfig
{
NumInstances = 1,
MachineTypeUri = String.Format(
"https://www.googleapis.com/compute/v1/projects/{0}/zones/{1}/machineTypes/{2}",
project, zone, machineType),
},
WorkerConfig = new InstanceGroupConfig
{
NumInstances = numWorkers,
MachineTypeUri = String.Format(
"https://www.googleapis.com/compute/v1/projects/{0}/zones/{1}/machineTypes/{2}",
project, zone, machineType),
},
},
};
Operation createOperation =
service.Projects.Regions.Clusters.Create(newCluster, project, dataprocGlobalRegion).Execute();
// Poll the operation:
while (!IsDone(createOperation))
{
Console.WriteLine("Polling operation {0}", createOperation.Name);
createOperation =
service.Projects.Regions.Operations.Get(createOperation.Name).Execute();
Thread.Sleep(1000);
}
Console.WriteLine("Done creating cluster {0}", newCluster.ClusterName);
}
static bool IsDone(Operation op)
{
return op.Done ?? false;
}
}
}
来源:https://stackoverflow.com/questions/35639306/google-dataproc-api-spark-cluster-with-c-sharp