Source code for sharkpy.predicting
#sharkpy/predict.py
import pandas as pd
import numpy as np
[docs]
def predict(self, X=None):
"""
Make predictions using the trained model.
Parameters:
X: dict, DataFrame, or array-like, optional
Input samples to predict. If None, predicts on training data.
Can be:
- dict: Single prediction scenario {'feature1': value1, 'feature2': value2}
- list of dicts: Multiple scenarios [{'feature1': value1}, {'feature1': value2}]
- DataFrame: Multiple samples with feature columns
- array-like: Raw feature values
Returns:
float, str, or array: Predicted values
"""
print("🦈 Sharky is analyzing the data!")
# Validate model exists
if not hasattr(self, 'model'):
raise AttributeError("🦈 Oops! Sharky needs training first. Call learn() before predict().")
# If no X provided, use training features
if X is None:
print("🦈 No data provided! Using training data for prediction...")
X = self.features
else:
# Process input
X = _validate_and_process_input(self, X)
# Make prediction
try:
predictions = self.model.predict(X)
# Decode categorical predictions if encoder exists
if hasattr(self, 'target_encoder'):
predictions = self.target_encoder.inverse_transform(predictions)
# Format output based on number of predictions
if len(predictions) == 1:
print(f"🦈 Prediction: {predictions[0]}")
return predictions[0]
else:
print(f"🦈 Made {len(predictions)} predictions!")
return predictions
except Exception as e:
print(f"🦈 Uh-oh! Sharky encountered an error: {str(e)}")
raise
[docs]
def predict_baseline(self):
"""
Make a baseline prediction with all features at minimum values.
Returns:
float or str: Baseline prediction
"""
print("🦈 Making baseline prediction (all features at minimum values)...")
# Create baseline sample with minimum values
baseline_data = {}
for col in self.features.columns:
if col in self.encoders: # Only use encoder for categorical features
baseline_data[col] = list(self.encoders[col].values())[0]
else:
# For numerical features, use 0
baseline_data[col] = 0
return predict(self, baseline_data)
[docs]
def _validate_and_process_input(self, X):
"""Helper method to validate and process input data"""
# Convert input to DataFrame
if isinstance(X, dict):
X = pd.DataFrame([X])
elif isinstance(X, np.ndarray):
if not hasattr(self, 'feature_names'):
raise ValueError("🦈 Sharky doesn't know the feature names for numpy arrays!")
X = pd.DataFrame(X, columns=self.feature_names)
elif not isinstance(X, pd.DataFrame):
raise ValueError("🦈 Input must be a dictionary, DataFrame, or numpy array!")
# Validate features
missing_features = set(self.feature_names) - set(X.columns)
if missing_features:
raise ValueError(f"🦈 Missing features: {missing_features}")
# Process categorical features
X_processed = X.copy()
for col in X_processed.columns:
if col in self.encoders:
cat_to_code = {v: k for k, v in self.encoders[col].items()}
codes = set(cat_to_code.values())
mapped = X_processed[col].map(cat_to_code)
# If values are already numeric codes, keep them
already_codes_mask = X_processed[col].isin(codes)
combined = mapped.where(~mapped.isnull(), X_processed[col])
# Now flag only truly unseen values (neither label nor known code)
unseen_mask = mapped.isnull() & ~already_codes_mask
if unseen_mask.any():
unseen_values = X[col][unseen_mask].unique()
raise ValueError(f"🦈 Unseen categories in '{col}': {unseen_values}")
# Ensure integer codes
X_processed[col] = combined.astype(int)
return X_processed