I am explaining the scenario with a piece of data:
Ex. data set.
GA_ID PN_ID PC_ID MBP_ID GR_ID AP_ID class
0.033 6.652 6.681 0.194
Perhaps this code and commented explanations will help (adapted from here).
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.ensemble import ExtraTreesClassifier
# Build a classification task using 3 informative features
X, y = make_classification(n_samples=1000,
n_features=10,
n_informative=3,
n_redundant=0,
n_repeated=0,
n_classes=2,
random_state=0,
shuffle=False)
# Build a forest and compute the feature importances
forest = ExtraTreesClassifier(n_estimators=250,
random_state=0)
forest.fit(X, y)
importances = forest.feature_importances_ #array with importances of each feature
idx = np.arange(0, X.shape[1]) #create an index array, with the number of features
features_to_keep = idx[importances > np.mean(importances)] #only keep features whose importance is greater than the mean importance
#should be about an array of size 3 (about)
print features_to_keep.shape
x_feature_selected = X[:,features_to_keep] #pull X values corresponding to the most important features
print x_feature_selected