pandas - I want to add a "spheres" to my data cluster -


i want add kind of "spheres" data cluster.

my data cluster this, not have ""spheres".

enter image description here

and code

import numpy np import matplotlib.pyplot plt matplotlib import style style.use('ggplot') import pandas pd sklearn.cluster import kmeans  my_file='total_watt.csv' date = [] consumption = []  df = pd.read_csv(my_file, parse_dates=[0], index_col=[0]) df = df.resample('1d', how='sum') df = df.dropna()  date = df.index.tolist() date = [x.strftime('%y-%m-%d') x in date] sklearn.preprocessing import labelencoder  encoder = labelencoder() date_numeric = encoder.fit_transform(date) consumption = df[df.columns[0]].values  x = np.array([date_numeric, consumption]).t  kmeans = kmeans(n_clusters=3) kmeans.fit(x)  centroids = kmeans.cluster_centers_ labels = kmeans.labels_  print(centroids) print(labels)  fig, ax = plt.subplots(figsize=(10,8)) rect = fig.patch rect.set_facecolor('#2d2b2b')    colors = ["b.","r.","g."]  in range(len(x)):     print("coordinate:",encoder.inverse_transform(x[i,0].astype(int)), x[i,1], "label:", labels[i])     ax.plot(x[i][0], x[i][1], colors[labels[i]], markersize = 10) ax.scatter(centroids[:, 0],centroids[:, 1], marker = "x", s=150, linewidths = 5, zorder = 10) = np.arange(0, len(x), 5) ax.set_xticks(a) ax.set_xticklabels(encoder.inverse_transform(a.astype(int))) ax.tick_params(axis='x', colors='lightseagreen') ax.tick_params(axis='y', colors='lightseagreen') plt.scatter(centroids[:, 0],centroids[:, 1], marker = "x", s=100, c="black", linewidths = 5, zorder = 10) ax.set_title('energy consumptions clusters (high/medium/low)', color='gold') ax.set_xlabel('time', color='gold') ax.set_ylabel('date(year 2011)', color='gold')   plt.show() 

"spheres" area surroundings plot(cluster), picture.

enter image description here

i tried google it.

but when type "matplotlib spheres", not result..

the sample graph in post looks resulting generalized gaussian mixture each sphere gaussian 2-d density.

i'll write sample code shortly demonstrate how use gmm on dataset , kind of plotting.

import numpy np import matplotlib.pyplot plt matplotlib import style style.use('ggplot') import pandas pd # code changes here # =========================================== sklearn.mixture import gmm # =========================================== sklearn.preprocessing import labelencoder  # replace file path my_file='/home/jian/downloads/total_watt.csv'  df = pd.read_csv(my_file, parse_dates=[0], index_col=[0]) df = df.resample('1d', how='sum') df = df.dropna()  date = df.index.tolist() date = [x.strftime('%y-%m-%d') x in date]  encoder = labelencoder() date_numeric = encoder.fit_transform(date) consumption = df[df.columns[0]].values  x = np.array([date_numeric, consumption]).t   # code changes here # =========================================== gmm = gmm(n_components=3, random_state=0) gmm.fit(x) y_pred = gmm.predict(x)  # center given mean gmm.means_  # ===========================================  import matplotlib mpl fig, ax = plt.subplots(figsize=(10,8))  i, color in enumerate('rgb'):     # sphere background     width, height = 2 * 1.96 * np.sqrt(np.diagonal(gmm._get_covars()[i]))     ell = mpl.patches.ellipse(gmm.means_[i], width, height, color=color)     ell.set_alpha(0.1)     ax.add_artist(ell)     # data points     x_data = x[y_pred == i]     ax.scatter(x_data[:,0], x_data[:,1], color=color)     # center     ax.scatter(gmm.means_[i][0], gmm.means_[i][1], marker='x', s=100, c=color)   ax.set_title('energy consumptions clusters (high/medium/low)', color='gold') ax.set_xlabel('time', color='gold') ax.set_ylabel('date(year 2011)', color='gold') = np.arange(0, len(x), 5) ax.set_xticks(a) ax.set_xticklabels(encoder.inverse_transform(a.astype(int))) ax.tick_params(axis='x', colors='lightseagreen') ax.tick_params(axis='y', colors='lightseagreen') 

enter image description here


Comments