i want add kind of "spheres" data cluster.
my data cluster this, not have ""spheres".

and code
import numpy np import matplotlib.pyplot plt matplotlib import style style.use('ggplot') import pandas pd sklearn.cluster import kmeans my_file='total_watt.csv' date = [] consumption = [] df = pd.read_csv(my_file, parse_dates=[0], index_col=[0]) df = df.resample('1d', how='sum') df = df.dropna() date = df.index.tolist() date = [x.strftime('%y-%m-%d') x in date] sklearn.preprocessing import labelencoder encoder = labelencoder() date_numeric = encoder.fit_transform(date) consumption = df[df.columns[0]].values x = np.array([date_numeric, consumption]).t kmeans = kmeans(n_clusters=3) kmeans.fit(x) centroids = kmeans.cluster_centers_ labels = kmeans.labels_ print(centroids) print(labels) fig, ax = plt.subplots(figsize=(10,8)) rect = fig.patch rect.set_facecolor('#2d2b2b') colors = ["b.","r.","g."] in range(len(x)): print("coordinate:",encoder.inverse_transform(x[i,0].astype(int)), x[i,1], "label:", labels[i]) ax.plot(x[i][0], x[i][1], colors[labels[i]], markersize = 10) ax.scatter(centroids[:, 0],centroids[:, 1], marker = "x", s=150, linewidths = 5, zorder = 10) = np.arange(0, len(x), 5) ax.set_xticks(a) ax.set_xticklabels(encoder.inverse_transform(a.astype(int))) ax.tick_params(axis='x', colors='lightseagreen') ax.tick_params(axis='y', colors='lightseagreen') plt.scatter(centroids[:, 0],centroids[:, 1], marker = "x", s=100, c="black", linewidths = 5, zorder = 10) ax.set_title('energy consumptions clusters (high/medium/low)', color='gold') ax.set_xlabel('time', color='gold') ax.set_ylabel('date(year 2011)', color='gold') plt.show() "spheres" area surroundings plot(cluster), picture.

i tried google it.
but when type "matplotlib spheres", not result..
the sample graph in post looks resulting generalized gaussian mixture each sphere gaussian 2-d density.
i'll write sample code shortly demonstrate how use gmm on dataset , kind of plotting.
import numpy np import matplotlib.pyplot plt matplotlib import style style.use('ggplot') import pandas pd # code changes here # =========================================== sklearn.mixture import gmm # =========================================== sklearn.preprocessing import labelencoder # replace file path my_file='/home/jian/downloads/total_watt.csv' df = pd.read_csv(my_file, parse_dates=[0], index_col=[0]) df = df.resample('1d', how='sum') df = df.dropna() date = df.index.tolist() date = [x.strftime('%y-%m-%d') x in date] encoder = labelencoder() date_numeric = encoder.fit_transform(date) consumption = df[df.columns[0]].values x = np.array([date_numeric, consumption]).t # code changes here # =========================================== gmm = gmm(n_components=3, random_state=0) gmm.fit(x) y_pred = gmm.predict(x) # center given mean gmm.means_ # =========================================== import matplotlib mpl fig, ax = plt.subplots(figsize=(10,8)) i, color in enumerate('rgb'): # sphere background width, height = 2 * 1.96 * np.sqrt(np.diagonal(gmm._get_covars()[i])) ell = mpl.patches.ellipse(gmm.means_[i], width, height, color=color) ell.set_alpha(0.1) ax.add_artist(ell) # data points x_data = x[y_pred == i] ax.scatter(x_data[:,0], x_data[:,1], color=color) # center ax.scatter(gmm.means_[i][0], gmm.means_[i][1], marker='x', s=100, c=color) ax.set_title('energy consumptions clusters (high/medium/low)', color='gold') ax.set_xlabel('time', color='gold') ax.set_ylabel('date(year 2011)', color='gold') = np.arange(0, len(x), 5) ax.set_xticks(a) ax.set_xticklabels(encoder.inverse_transform(a.astype(int))) ax.tick_params(axis='x', colors='lightseagreen') ax.tick_params(axis='y', colors='lightseagreen') 
Comments
Post a Comment