Chapter 9. How to customize ML algo and fitting functions

To simplify the implementation of specific ML algorithms or fitting functions, CutiePie implements a factory for handling models and offers simple skeletons to fill in. Below a look at the skeleton for ML and fitting with examples of the implementation for K-Mean and Gaussian fit.

Example 9-1. ML algorithm skeleton.


	  import io
	  import sys, os
	  sys.path.append(os.getcwd())
	  
	  import algo_factory
	  
	  # Skeleton algo parameters
	  # param_1 = 5
	  # param_2 = 'test',
	  # param_3 = 100
	  
	  class SkelAlgo:
       	      def __init__(self, param_1, param_2, param_3):
                 self.param_1 = param_1
		 self.param_2 = param_2
		 self.param_3 = param_3
	  
	  # implementation of the algorithm, the argument are mandatory even if not used
	      def start(self, data, weigths, nclusters, axis, figure=None):
              print("Skeleton example. Implementation is all yours.")
	  
	  class SkelAlgoBuilder:
	      def __init__(self):
                 self._instance = None
	  
	      def __call__(self, param_1 = 5, param_2 = 'test', param_3 = 100, **_ignored):
                 if not self._instance:
                    self._instance = SkelAlgo(param_1, param_2, param_3)
                 return self._instance
        

Example 9-2. Example of ML algorithm based on the skeleton. The output is shown in Figure 3-5.


	  ...
	  def start(self, data, weigths, nclusters, axis, figure=None):
	     # create kmeans object
             kmeans = KMeans(nclusters, self.init, self.n_init, self.max_iter, self.tol, self.precompute_distances, self.verbose, self.random_state, self.algorithm)
             # fit kmeans object to data
             kmeans.fit(data, sample_weight=weigths)
	  
             cluster_center = kmeans.cluster_centers_
             confidPerc = self.soft_clustering_weights(data,cluster_center)
	  
             prob_dict = {}
             for i in range(len(confidPerc[0])):
                  prob_dict[i] = self.extract(confidPerc, i)

             # CL 90%/CL 95%
             bool_dict_90 = {}
             bool_dict_95 = {}
             w_90 = {}
             w_95 = {}
             sum90 = 0
             sum95 = 0
             for i in range(len(cluster_center)):
                  self.addPoint(axis, [cluster_center[i][0], cluster_center[i][1]])
		  bool_dict_90[i] = [True if x>0.9 else False for i, x in enumerate(prob_dict[i])]
		  bool_dict_95[i] = [True if x>0.95 else False for i, x in enumerate(prob_dict[i])]
		  w_90[i] = list(compress(weigths, bool_dict_90[i]))
		  w_95[i] = list(compress(weigths, bool_dict_95[i]))
		  sum90 += sum(w_90[i])
		  sum95 += sum(w_95[i])

             print("#########################################")
             print("# Results of K-Mean clustering analysis #")
             print("#########################################")
             for i in range(len(cluster_center)):
                  print("Cluster", i," with center (x,y)=(",cluster_center[i][0],",",cluster_center[i][1],")")
             print("Confidence Level 90% -->", sum90/sum(weigths),"%")
             print("Confidence Level 95% -->", sum95/sum(weigths),"%")
             print("#########################################")
	   ...
        

Example 9-3. Fitting function skeleton.


	  import io
	  import sys, os
	  sys.path.append(os.getcwd())

	  import pandas as pd
	  import numpy as np

	  
	  import fit_factory

	  # Skeleton fit parameters
	  # param_1 = 1
	  # param_2 = 1,
	  # param_3 = 10

	  class SkelFit:
     	     def __init__(self, param_1, param_2, param_3):
                 self.param_1 = param_1
                 self.param_2 = param_2
                 self.param_3 = param_3

             # implementation of the algorithm, the argument are mandatory even if not used
   	     def start(self, x, y, xmin, xmax, axis, fit_results):
                 print("Skeleton example. Implementation is all yours.")

	  class SkelFitBuilder:
             def __init__(self):
                self._instance = None

             def __call__(self, param_1 = 1, param_2 = 2, param_3 = 10, **_ignored):
                if not self._instance:
                   self._instance = SkelFit(param_1, param_2, param_3)
                return self._instance
        

Example 9-4. Example of fitting function based on the skeleton.


	  ...
	  # function defined by the user
	  def gauss(self, x, amplitude, mean, standard_deviation):
              return amplitude*np.exp(-(x-mean)**2.0 / (2*standard_deviation**2))

	  # implementation of the fitting algorithm
	  def start(self, x, y, xmin, xmax, axis, fit_results):
              fitln =None
              amplitude = 2000
              mean = xmin+(xmax-xmin)/2
              standard_deviation = mean/10
              p_init = [amplitude, mean, standard_deviation]
              print(p_init)

              popt, pcov = curve_fit(self.gauss, x, y, p0=p_init, maxfev=5000)

              # plotting fit curve and printing results
              try:
                 x_fit = np.linspace(x[0],x[-1], 10000)
                 y_fit = self.gauss(x_fit, *popt)
	      
                 fitln, = axis.plot(x_fit,y_fit, 'r-')
                 for i in range(len(popt)):
                     s = 'Par['+str(i)+']: '+str(round(popt[i],3))+'+/-'+str(round(pcov[i][i],3))
                     fit_results.append(s)
	      except:
                 pass
              return fitln
	  ...