import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import datasets,preprocessing from sklearn.model_selection import learning_curve from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score from sklearn.ensemble import ExtraTreesClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score,classification_report,confusion_matrix from pandas import read_csv data_set=read_csv("Smarket.csv") data = data_set.values[:,:] # 平均数 np.mean(data_set) np.mean(data_set["Lag1"]) # 中位数 np.median(data_set["Lag1"]) # 方差 np.var(data_set) #标准差 np.std(data_set) # 极差 np.ptp(data_set["Lag1"]) # 两组数据的相关性矩阵 np.cov(data_set["Lag1"],data_set["Lag2"]) # 协方差矩阵 np.corrcoef(data_set["Lag1"],data_set["Lag2"]) # 分位数 q1=data_set.quantile(0.25) q2=data_set.quantile(0.5) q3=data_set.quantile(0.75) # 汇总统计 data_set.describe() #显示所有列 pd.set_option('display.max_columns', None) #显示所有行 pd.set_options('display.max_rows', None)