Custom algorithm template
You can use the following custom algorithm template to help get you started with adding a custom algorithm to MLTK.
BaseAlgo class
From base import BaseAlgo
.
class CustomAlgoTemplate(BaseAlgo): def __init__(self, options): # Option checking & initializations here pass def fit(self, df, options): # Fit an estimator to df, a pandas DataFrame of the search results pass def partial_fit(self, df, options): # Incrementally fit a model pass def apply(self, df, options): # Apply a saved model # Modify df, a pandas DataFrame of the search results return df @staticmethod def register_codecs(): # Add codecs to the codec manager pass
Using the Basealgo template in a search, reflects the input data back to the search as shown in the following example.
| fit CustomAlgoTemplate *
These are all described in detail in the $SPLUNK_HOME/etc/apps/Splunk_ML_Toolkit/bin/base.py
BaseAlgo class as shown below.
Example
The following is an example of a custom Correlation Matrix:
from base import BaseAlgo class CorrelationMatrix(BaseAlgo): """Compute and return a correlation matrix.""" def __init__(self, options): """Check for valid correlation type, and save it to an attribute on self.""" feature_variables = options.get('feature_variables', {}) target_variable = options.get('target_variable', {}) if len(feature_variables) == 0: raise RuntimeError('You must supply one or more fields') if len(target_variable) > 0: raise RuntimeError('CorrelationMatrix does not support the from clause') valid_methods = ['spearman', 'kendall', 'pearson'] # Check to see if parameters exist params = options.get('params', {}) # Check if method is in parameters in search if 'method' in params: if params['method'] not in valid_methods: error_msg = 'Invalid value for method: must be one of {}'.format( ', '.join(valid_methods)) raise RuntimeError(error_msg) # Assign method to self for later usage self.method = params['method'] # Assign default method & ensure no other parameters are present else: # Default method for correlation self.method = 'pearson' # Check for bad parameters if len(params) > 0: raise RuntimeError('The only valid parameter is method.') def fit(self, df, options): """Compute the correlations and return a DataFrame.""" # df contains all the search results, including hidden fields # but the fields we requested are saved as self.feature_variables requested_columns = df[self.feature_variables] # Get correlations correlations = requested_columns.corr(method=self.method) # Reset index so that all the data are in columns # (this is usually not necessary, but is for the corr method) output_df = correlations.reset_index() return output_df
Write a Python algorithm class | Running process and method calling conventions |
This documentation applies to the following versions of Splunk® Machine Learning Toolkit: 5.4.0, 5.4.1, 5.4.2
Feedback submitted, thanks!