Source code for generalizit.generalizit

import re
from typing import Tuple, Optional, Dict, List
import numpy as np
import pandas as pd
from generalizit.design import Design
from generalizit.design_utils import parse_facets, match_research_design, validate_research_design, get_facets_from_variance_tuple_dictionary
import warnings

[docs] class GeneralizIT: """ High-level API for Generalizability Theory analysis. GeneralizIT provides a user-friendly interface for conducting Generalizability Theory (G-Theory) analyses, including variance component estimation, reliability coefficient calculation, Decision studies (D-studies), and confidence interval estimation. This class serves as a wrapper around the core analytical engine (Design class), handling data preparation, research design interpretation, and result presentation. Parameters: data (pd.DataFrame): Dataset containing facet variables and response measurements. design_str (str): String specifying the research design in standard notation (e.g., "p x i" for persons crossed with items). response (str): Column name in data containing the response/measurement values. Attributes: design (Design): The underlying Design object that performs calculations. Example: >>> import pandas as pd >>> from generalizit import GeneralizIT >>> data = pd.read_csv("my_data.csv") >>> gt = GeneralizIT(data, "p x i", "score") >>> gt.calculate_anova() >>> gt.calculate_g_coefficients() >>> gt.g_coefficients_summary() """ def __init__(self, data: pd.DataFrame, design_str: str, response: str, variance_tuple_dictionary: Optional[Dict[str, Tuple[str, ...]]] = None): # Initialize the GeneralizIT class # If a variance tuple dictionary is not provided, create one if variance_tuple_dictionary is None: # First we parse the input string to get the research design design_num, facets = match_research_design(design_str) # Validate the research design try: validate_research_design(design_num) except ValueError as e: raise ValueError(e) variance_tuple_dictionary = parse_facets(design_num=design_num, design_facets=facets) else: # Validate the provided variance tuple dictionary if not isinstance(variance_tuple_dictionary, dict): raise ValueError("variance_tuple_dictionary must be a dictionary.") if not all(isinstance(k, str) and isinstance(v, tuple) for k, v in variance_tuple_dictionary.items()): raise ValueError("variance_tuple_dictionary must contain string keys and tuple values.") facets = get_facets_from_variance_tuple_dictionary(variance_tuple_dictionary) data, missing_data = self._clean_data(data=data, facets=facets, response=response) # Initialize the design class based on the research design self.design = Design( data=data, variance_tuple_dictionary=variance_tuple_dictionary, missing_data=missing_data, response_col=response ) def _clean_data(self, data: pd.DataFrame, facets: list[str], response: str) -> Tuple[pd.DataFrame, bool]: """ Prunes the input DataFrame by dropping columns that are not in the list of facets or the response variable. Args: data (pd.DataFrame): The input DataFrame to be pruned. facets (list[str]): The list of facet column names. response (str): The response variable column name. Returns: pd.DataFrame: The pruned DataFrame containing only the columns specified in facets and the response variable. bool: A boolean indicating whether there are missing values in the data. """ for col in data.columns: if col != response: data = data.rename(columns={col: re.sub(r"\s+", " ", col.strip().lower())}) # Normalize the column names # Combine factors and response variable into a single list variables = list(facets) + [response] # Create a list of columns to drop drop_cols = [col for col in data.columns if col not in variables] # Create a warning message if any columns are dropped if len(drop_cols) > 0: print("Warning: The following columns have been dropped from the data:") for col in drop_cols: print(col) # Drop the columns data = data.drop(columns=drop_cols) # Check for missing values if data.isnull().values.any(): print("Warning: Missing values detected in the data.") missing_data = True else: missing_data = False return data, missing_data
[docs] def calculate_anova(self): """ Calculate variance components using ANOVA. This method is a wrapper for the Design.calculate_anova() method, which implements Henderson's Method 1 to estimate variance components for each facet in the specified research design. Returns: None: Results are stored in the underlying Design object. Notes: This method must be called before calculating G-coefficients, confidence intervals, or D-study scenarios. """ # Calculate the ANOVA table self.design.calculate_anova()
[docs] def calculate_g_coefficients(self, fixed_facets: Optional[List[str]] = None, **kwargs): """ Calculate generalizability coefficients. This method is a wrapper for the Design.g_coeffs() method, which computes phi-squared (Φ²) and rho-squared (ρ²) coefficients for each potential object of measurement in the design. Parameters: - fixed_facets Optional(List[str]): List of facets to be treated as fixed. **kwargs: Optional keyword arguments passed to Design.g_coeffs(). - error_variance (bool): If True, prints detailed information about error variances during calculation. Default is False. - Other parameters as documented in Design.g_coeffs(). Returns: None: Results are stored in the underlying Design object. Raises: RuntimeError: If ANOVA table hasn't been calculated first. """ # First check that the ANOVA table has been calculated if self.design.anova_table is None: raise RuntimeError("ANOVA table must be calculated first. Please run the calculate_anova() method.") # Check that the fixed facets are valid if fixed_facets is not None: if not isinstance(fixed_facets, list): raise ValueError("fixed_facets must be a list.") max_tuple = max(self.design.variance_tuple_dictionary.values(), key=len) if len(fixed_facets) > len(max_tuple) - 2: raise ValueError("The number of fixed facets cannot exceed the number of facets in the design minus 2.") all_facets = set(f for t in self.design.variance_tuple_dictionary.values() for f in t) for facet in fixed_facets: if facet not in all_facets: raise ValueError(f"Fixed facet '{facet}' not found in the design facets: {all_facets}") print("Warning: Fixed facets should only be used with balanced designs without missing data.") # Calculate the G coefficients self.design.g_coeffs(fixed_facets=fixed_facets, **kwargs)
[docs] def g_coeffs(self, **kwargs): """ [DEPRECATED] Calculate generalizability coefficients. This method is deprecated and will be removed in a future version. Please use `calculate_g_coefficients()` instead. See `calculate_g_coefficients()` for parameter details. """ warnings.warn( "The g_coeffs() method is deprecated and will be removed in a future version. " "Please use calculate_g_coefficients() instead.", DeprecationWarning, stacklevel=2 ) return self.calculate_g_coefficients(**kwargs)
[docs] def calculate_d_study(self, d_study_design: Optional[dict] = None, fixed_facets: Optional[List[str]] = None, **kwargs): """ Calculate G-coefficients for alternative research designs (D-Study). This method is a wrapper for the Design.calculate_d_study() method, which examines multiple possible study designs by generating combinations of provided facet levels. Parameters: - d_study_design (dict, optional): Dictionary where keys are facet names and values are lists of integers representing different numbers of levels to test. - fixed_facets Optional(List[str]): List of facets to be treated as fixed. - **kwargs: Optional keyword arguments passed to Design.calculate_d_study(). Returns: None: Results are stored in the underlying Design object. Raises: RuntimeError: If ANOVA table hasn't been calculated first. """ # First check that the ANOVA table has been calculated if self.design.anova_table.empty: raise RuntimeError("ANOVA table must be calculated first. Please run the calculate_anova() method.") # Check that the fixed facets are valid if fixed_facets is not None: if not isinstance(fixed_facets, list): raise ValueError("fixed_facets must be a list.") max_tuple = max(self.design.variance_tuple_dictionary.values(), key=len) if len(fixed_facets) > len(max_tuple) - 2: raise ValueError("The number of fixed facets cannot exceed the number of facets in the design minus 2.") all_facets = set(f for t in self.design.variance_tuple_dictionary.values() for f in t) for facet in fixed_facets: if facet not in all_facets: raise ValueError(f"Fixed facet '{facet}' not found in the design facets: {all_facets}") print("Warning: Fixed facets should only be used with balanced designs without missing data.") # Calculate the D study self.design.calculate_d_study(d_study_design=d_study_design, fixed_facets=fixed_facets, **kwargs)
[docs] def calculate_confidence_intervals(self, alpha: float = 0.05, **kwargs): """ Calculate confidence intervals for facet level means. This method is a wrapper for the Design.calculate_confidence_intervals() method, which computes confidence intervals for individual facets based on variance component analysis. Parameters: alpha (float, optional): Significance level for confidence intervals. Default is 0.05 (producing 95% confidence intervals). **kwargs: Optional keyword arguments passed to Design.calculate_confidence_intervals(). Returns: None: Results are stored in the underlying Design object. Raises: RuntimeError: If ANOVA table hasn't been calculated first. """ # First check that the ANOVA table has been calculated if self.design.anova_table.empty: raise RuntimeError("ANOVA table must be calculated first. Please run the calculate_anova() method.") # Calculate the confidence intervals self.design.calculate_confidence_intervals(alpha=alpha, **kwargs)
# ----------------- Summary Methods -----------------
[docs] def anova_summary(self): # First check that the ANOVA table has been calculated if self.design.anova_table.empty: raise RuntimeError("ANOVA table must be calculated first. Please run the calculate_anova() method.") # Print the ANOVA table self.design.anova_summary()
[docs] def variance_summary(self): # First check that the ANOVA table has been calculated if self.design.anova_table.empty: raise RuntimeError("ANOVA table must be calculated first. Please run the calculate_anova() method.") # Print the variance components self.design.variance_summary()
[docs] def g_coefficients_summary(self): # First check that the G coefficients have been calculated if self.design.g_coeffs_table.empty: raise RuntimeError("G coefficients must be calculated first. Please run the g_coeffs() method.") # Print the G coefficients self.design.g_coeff_summary()
[docs] def d_study_summary(self): # First check that the D study has been calculated if not self.design.d_study_dict: raise RuntimeError("D study must be calculated first. Please run the calculate_d_study() method.") # Print the D study self.design.d_study_summary()
[docs] def confidence_intervals_summary(self): # First check that the confidence intervals have been calculated if self.design.confidence_intervals is {}: raise RuntimeError("Confidence intervals must be calculated first. Please run the calculate_confidence_intervals() method.") # Print the confidence intervals self.design.confidence_intervals_summary()
# ---- End of GeneralizIT Class ---- # # ---- Wrapper Documentation ---- # GeneralizIT.calculate_anova.__doc__ = ( # GeneralizIT.calculate_anova.__doc__ + # "\n\n" + # Design.anova_summary.__doc__ # ) # GeneralizIT.calculate_g_coefficients.__doc__ = ( # GeneralizIT.calculate_g_coefficients.__doc__ + # "\n\n" + # Design.g_coeffs.__doc__ # ) # GeneralizIT.calculate_d_study.__doc__ = ( # GeneralizIT.calculate_d_study.__doc__ + # "\n\n" + # Design.calculate_d_study.__doc__ # ) # GeneralizIT.calculate_confidence_intervals.__doc__ = ( # GeneralizIT.calculate_confidence_intervals.__doc__ + # "\n\n" + # Design.calculate_confidence_intervals.__doc__ # ) # GeneralizIT.anova_summary.__doc__ = Design.anova_summary.__doc__ # GeneralizIT.variance_summary.__doc__ = Design.variance_summary.__doc__ # GeneralizIT.g_coefficients_summary.__doc__ = Design.g_coeff_summary.__doc__ # GeneralizIT.d_study_summary.__doc__ = Design.d_study_summary.__doc__ # GeneralizIT.confidence_intervals_summary.__doc__ = Design.confidence_intervals_summary.__doc__ # # ---- End of Wrapper Documentation ----