Source code for generalizit.g_theory_utils

import pandas as pd
from typing import Dict, Union, List, Tuple, Any, Optional
import itertools
from itertools import combinations

[docs] def create_pseudo_df(d_study: Dict[str, int], variance_tup_dict: Dict[str, tuple]) -> pd.DataFrame: """ Create a pseudo DataFrame with all possible combinations of facet levels. Parameters: d_study (dict): A dictionary representing the study design with facets as keys and the number of levels for each facet as values. Values can be either integers or lists of integers. variance_tup_dict (dict): A dictionary mapping facet names to tuples containing the component facets. Returns: pd.DataFrame: A pseudo DataFrame with all possible combinations of facet levels. """ # Identify nested structure from variance_tup_dict nested_vars = {} crossed_vars = set() for comp_name, components in variance_tup_dict.items(): if ":" in comp_name and " x " not in comp_name: # This is a nested relationship # For a string like "i:p", components would be ("i", "p") # where i is nested within p nested_relationship = list(components) for i in range(len(nested_relationship) - 1): child = nested_relationship[i] parent = nested_relationship[i + 1] if child not in nested_vars: nested_vars[child] = [] nested_vars[child].append(parent) elif " x " in comp_name: # This is a crossed relationship for component in components: crossed_vars.add(component) # Find all facets all_facets = set(d_study.keys()) # Order facets based on nesting ordered_facets = [] # First add facets that are not nested within anything (top level) for facet in all_facets: if facet not in nested_vars: ordered_facets.append(facet) # print(f"Top level facets: {ordered_facets}") # Then add nested facets in order of nesting depth remaining_facets = all_facets - set(ordered_facets) while remaining_facets: for facet in list(remaining_facets): # If all parents of this facet are already in ordered_facets if facet in nested_vars and all(parent in ordered_facets for parent in nested_vars[facet]): ordered_facets.append(facet) remaining_facets.remove(facet) # If we couldn't add any more facets, there might be a circular dependency if not set(ordered_facets) & remaining_facets: # Add remaining facets in arbitrary order ordered_facets.extend(list(remaining_facets)) break # print(f"Ordered facets after processing: {ordered_facets}") # Reverse the order for proper nesting in for loops # In a design like i:p, we want loops ordered as p then i ordered_facets.reverse() # print(f"Ordered facets for nested structure: {ordered_facets}") # Generate all combinations data = {facet: [] for facet in all_facets} # Use nested loops to generate combinations def generate_combinations(facet_index=0, current_values={}): if facet_index >= len(ordered_facets): # We've assigned values to all facets, add to data for facet, value in current_values.items(): data[facet].append(value) return current_facet = ordered_facets[facet_index] num_levels = d_study[current_facet] for level in range(1, num_levels + 1): new_values = current_values.copy() new_values[current_facet] = level generate_combinations(facet_index + 1, new_values) generate_combinations() # Create DataFrame return pd.DataFrame(data)
[docs] def adjust_for_fixed_effects( variance_tup_dict: Dict[str, tuple], variance_df: pd.DataFrame, levels_df: pd.DataFrame, fixed_facets: Optional[List[str]], verbose: bool = False ) -> Tuple[Dict[str, tuple], pd.DataFrame]: """ Adjust variance components for fixed facets in any design. Follows Brennan's rule 4.3.1: For every alpha, absorb any variance component with alpha and fixed facet into the lower-order component. Args: variance_tup_dict: dict mapping component names to tuples of facets. variance_df: DataFrame with index as variance component names and a 'Variance' column. levels_df: DataFrame of levels coefficients (1/levels). fixed_facets: list of facets to fix (e.g., ['i']). verbose: bool, if True, print debug information. Returns: adjusted_variance_tup_dict: dict mapping adjusted component names to tuples of facets. adjusted_variance_df: DataFrame with adjusted variance components. """ def vprint(*args, **kwargs): """Verbose print function.""" if verbose: print(*args, **kwargs) if fixed_facets is None: vprint("No fixed facets provided. Returning original variance components.") return variance_tup_dict, variance_df else: # Gather all unique facets all_facets = set() for facets in variance_tup_dict.values(): all_facets.update(facets) all_facets = list(all_facets) vprint(f"All facets: {all_facets}") # Identify random facets random_facets = [facet for facet in all_facets if facet not in fixed_facets] vprint(f"Random facets: {random_facets}") # Prepare variance dict adjusted_variance_dict = variance_df['Variance'].to_dict() adjusted_variance_dict.pop('mean', None) vprint(f"Initial variance dict: {adjusted_variance_dict}") # Remove fixed facet main effects for fixed_facet in fixed_facets: adjusted_variance_dict.pop(fixed_facet, None) vprint(f"Variance dict after removing fixed facets: {adjusted_variance_dict}") # Only keep keys not involving fixed facets key_list = [ key for key in adjusted_variance_dict if not any(facet in variance_tup_dict[key] for facet in fixed_facets) ] vprint(f"Keys to keep: {key_list}") # Absorb higher-order interactions with fixed facets for variance in key_list: facets = list(variance_tup_dict[variance]) initial_var = adjusted_variance_dict[variance] combinations_list = facets + list(fixed_facets) interaction_tuples = [] for r in range(len(facets) + 1, len(combinations_list) + 1): for combo in combinations(combinations_list, r): interaction_tuples.append(combo) vprint(f"Interactions for {variance}: {interaction_tuples}") for interaction in interaction_tuples: for key, tup in variance_tup_dict.items(): if set(interaction) == set(tup) and key in adjusted_variance_dict: vprint(f"Absorbing {key} into {variance}") initial_var += adjusted_variance_dict[key] * levels_df.at[variance, key] adjusted_variance_dict[key] = None # Mark for removal break adjusted_variance_dict[variance] = round(initial_var, 4) # Remove absorbed keys adjusted_variance_dict = {k: v for k, v in adjusted_variance_dict.items() if v is not None} vprint(f"Final adjusted variance dict: {adjusted_variance_dict}") # Build adjusted tuple dict and DataFrame adjusted_variance_tup_dict = {k: variance_tup_dict[k] for k in adjusted_variance_dict} adjusted_variance_df = pd.DataFrame.from_dict(adjusted_variance_dict, orient='index', columns=['Variance']) return adjusted_variance_tup_dict, adjusted_variance_df