Source code for numpyradiomics.mod_glcm

import numpy as np


[docs]
def glcm(image, mask, binWidth=25, distances=[1], symmetricalGLCM=True, weightingNorm=None):
    """
    Compute 24 Pyradiomics-style GLCM (Gray Level Co-occurrence Matrix) features.

    The GLCM describes the second-order joint probability function of an image region,
    constrained by the mask. It counts how often pairs of voxels with specific gray
    levels and specific spatial relationships occur.

    Args:
        image (np.ndarray): 3D image array containing voxel intensities.
        mask (np.ndarray): 3D mask array (same shape as image), where non-zero values indicate the ROI.
        binWidth (float, optional): Width of bins for discretization. Default is 25.
        distances (list, optional): List of integer distances (offsets) to compute GLCMs for. Default is [1].
        symmetricalGLCM (bool, optional): If True, counts co-occurrences in both directions (i->j and j->i).
                                          If False, only counts i->j. Default is True.
        weightingNorm (str, optional): Method to weight GLCMs if multiple distances are used.
                                       Options: None, 'manhattan', 'euclidean', 'infinity'. Default is None.

    Returns:
        dict: Dictionary containing the 24 GLCM features:
            - **Autocorrelation**: Magnitude of the fineness and coarseness of texture.
            - **ClusterProminence**: Asymmetry of the GLCM.
            - **ClusterShade**: Skewness of the matrix.
            - **ClusterTendency**: Grouping of voxels with similar gray-level values.
            - **Contrast**: Local intensity variation (favors contributions away from the diagonal).
            - **Correlation**: Linear dependency of gray levels to their neighbors.
            - **DifferenceAverage**: Mean of the difference distribution.
            - **DifferenceEntropy**: Randomness/variability in neighborhood intensity differences.
            - **DifferenceVariance**: Variance of the difference distribution.
            - **JointAverage**: Mean gray level intensity of the $i$ distribution.
            - **JointEnergy**: Homogeneity of the texture (sum of squared elements).
            - **JointEntropy**: Randomness/variability in neighborhood intensity values.
            - **Id**: Inverse Difference (homogeneity 1).
            - **Idm**: Inverse Difference Moment (homogeneity 2).
            - **Idmn**: Normalized Inverse Difference Moment.
            - **Idn**: Normalized Inverse Difference.
            - **Imc1**: Informational Measure of Correlation 1.
            - **Imc2**: Informational Measure of Correlation 2.
            - **InverseVariance**: Inverse variance of the difference distribution.
            - **MCC**: Maximal Correlation Coefficient.
            - **MaximumProbability**: Occurrence of the most predominant pair of neighboring intensity values.
            - **SumAverage**: Mean of the sum distribution.
            - **SumEntropy**: Sum of entropies.
            - **SumSquares**: Variance of the distribution (Cluster Tendency is a specific case of this).

    Example:
        >>> import numpyradiomics as npr
        >>> # Generate a noisy ellipsoid
        >>> img, mask = npr.dro.noisy_ellipsoid(radii_mm=(10, 10, 10), intensity_range=(0, 100))
        >>> 
        >>> # Compute GLCM features (distance=1)
        >>> feats = npr.glcm(img, mask, binWidth=10, distances=[1])
        >>> 
        >>> print(f"Contrast: {feats['Contrast']:.4f}")
        Contrast: 12.4501
    """
    if not np.any(mask > 0):
        raise ValueError("Mask contains no voxels")

    roi = image[mask > 0]
    Imin = roi.min()
    
    # --- FIX: Clamp discretization to avoid epsilon errors ---
    # If image[i] == Imin, (image - Imin) might be -1e-16, causing floor() to be -1
    # This would map valid pixels to bin 0 (background), shifting the mean/autocorr.
    diff = image - Imin
    diff[mask > 0] = np.maximum(diff[mask > 0], 0)
    
    img_q = np.floor(diff / binWidth).astype(np.int32) + 1
    img_q[mask == 0] = 0
    levels = int(img_q.max())

    # --- FIX 2: Handle Degenerate (Single Gray Level) ROIs ---
    if levels <= 1:
        # Returns standard PyRadiomics defaults for flat regions
        return {
            'Autocorrelation': 1.0, 'ClusterProminence': 0.0, 'ClusterShade': 0.0, 
            'ClusterTendency': 0.0, 'Contrast': 0.0, 'Correlation': 1.0, 
            'DifferenceAverage': 0.0, 'DifferenceEntropy': 0.0, 'DifferenceVariance': 0.0, 
            'Id': 1.0, 'Idm': 1.0, 'Idmn': 1.0, 'Idn': 1.0, 'Imc1': 0.0, 'Imc2': 0.0, 
            'InverseVariance': 0.0, 'JointAverage': 1.0, 'JointEnergy': 1.0, 
            'JointEntropy': 0.0, 'MCC': 1.0, 'MaximumProbability': 1.0, 
            'SumAverage': 2.0, 'SumEntropy': 0.0, 'SumSquares': 0.0
        }

    # ... (Step 2: Generate Angles - SAME AS BEFORE) ...
    dims = image.ndim
    if dims == 3:
        angles = [
            (0,0,1), (0,1,0), (1,0,0), 
            (0,1,1), (0,1,-1), (1,0,1), (1,0,-1), (1,1,0), (1,-1,0), 
            (1,1,1), (1,1,-1), (1,-1,1), (1,-1,-1)
        ]
    else:
        angles = [(0, 1), (1, 1), (1, 0), (1, -1)]

    final_offsets = []
    d_seq = distances if isinstance(distances, list) else [distances]
    for d_list in d_seq:
        d_sub = d_list if isinstance(d_list, list) else [d_list]
        for d in d_sub:
            for a in angles:
                final_offsets.append(np.array(a) * d)

    # ... (Step 3: Accumulate P Matrix - SAME AS BEFORE) ...
    P_accum = np.zeros((levels, levels), dtype=np.float64)
    
    weights = np.ones(len(final_offsets))
    if weightingNorm == 'manhattan':
        weights = np.array([1.0 / np.sum(np.abs(o)) for o in final_offsets])
    elif weightingNorm == 'euclidean':
        weights = np.array([1.0 / np.linalg.norm(o) for o in final_offsets])
    elif weightingNorm == 'infinity':
        weights = np.array([1.0 / np.max(np.abs(o)) for o in final_offsets])
    
    weights /= np.sum(weights)

    for offset, w in zip(final_offsets, weights):
        P_accum += w * _glcm_offset(img_q, mask, offset, levels, symmetricalGLCM)

    if P_accum.sum() > 0:
        P_accum /= P_accum.sum()

    return _compute_features_from_matrix(P_accum)



def _glcm_offset(img, mask, offset, levels, symmetric):
    slices_src = []
    slices_dst = []
    for shift in offset:
        if shift > 0:
            slices_src.append(slice(0, -shift))
            slices_dst.append(slice(shift, None))
        elif shift < 0:
            slices_src.append(slice(-shift, None))
            slices_dst.append(slice(0, shift))
        else:
            slices_src.append(slice(None))
            slices_dst.append(slice(None))
            
    src_vals = img[tuple(slices_src)]
    dst_vals = img[tuple(slices_dst)]
    valid = (src_vals > 0) & (dst_vals > 0)
    
    rows = src_vals[valid] - 1
    cols = dst_vals[valid] - 1
    
    P = np.zeros((levels, levels), dtype=np.float64)
    np.add.at(P, (rows, cols), 1)
    
    if symmetric:
        P += P.T
    return P

def _compute_features_from_matrix(P):
    Ng = P.shape[0]
    eps = 2e-16
    
    # Grid indices (1-based)
    i, j = np.indices((Ng, Ng)) + 1 
    
    # Marginal probabilities
    px = np.sum(P, axis=1) # Row sums
    py = np.sum(P, axis=0) # Col sums
    
    # --- FIX 1: Correct Mean Calculation ---
    # Use 1D index vector for means to avoid 2D broadcasting errors
    k_values = np.arange(1, Ng + 1)
    ux = np.sum(k_values * px) 
    uy = np.sum(k_values * py)
    
    # Variances
    # Here broadcasting works correctly because (N,N) * (N,) aligns on columns?
    # No, to be safe, use explicit reshaping or the grid 'i'.
    # i is (Ng, Ng) where rows are constant 1, 2... 
    # px is (Ng,). 
    # (i - ux)**2 * px REQUIRES checking broadcast alignment.
    # It's safer to use the 1D vector:
    sigx2 = np.sum(((k_values - ux)**2) * px)
    sigy2 = np.sum(((k_values - uy)**2) * py)
    sigx = np.sqrt(sigx2)
    sigy = np.sqrt(sigy2)

    # ... (Rest of features use 'i' and 'j' grids which is correct) ...
    
    autocorr = np.sum(i * j * P)
    joint_avg = ux 
    cluster_prom = np.sum(((i + j - ux - uy)**4) * P)
    cluster_shade = np.sum(((i + j - ux - uy)**3) * P)
    cluster_tend = np.sum(((i + j - ux - uy)**2) * P)
    contrast = np.sum(((i - j)**2) * P)
    
    if sigx * sigy == 0:
        correlation = 1.0
    else:
        correlation = (np.sum((i - ux) * (j - uy) * P) / (sigx * sigy))
        
    k_vals_diff = np.abs(i - j)
    px_minus_y = np.bincount(k_vals_diff.ravel(), weights=P.ravel())
    k_indices = np.arange(len(px_minus_y))
    
    diff_avg = np.sum(k_indices * px_minus_y)
    diff_ent = -np.sum(px_minus_y * np.log2(px_minus_y + eps))
    diff_var = np.sum(((k_indices - diff_avg)**2) * px_minus_y)
    
    joint_energy = np.sum(P**2)
    joint_ent = -np.sum(P * np.log2(P + eps))
    
    i_minus_j = np.abs(i - j)
    i_minus_j_sq = i_minus_j**2
    
    Id = np.sum(P / (1 + i_minus_j))
    Idm = np.sum(P / (1 + i_minus_j_sq))
    
# ... inside _compute_features_from_matrix ...

    # MCC (Maximal Correlation Coefficient)
    try:
        px_safe = px.copy(); px_safe[px==0]=1
        py_safe = py.copy(); py_safe[py==0]=1
        
        # Calculate Q matrix
        Q = (P / px_safe[:, None]) @ (P / py_safe[None, :]).T
        
        # Calculate eigenvalues
        eigenvalues = np.linalg.eigvals(Q)
        
        # Sort and take the second largest magnitude
        # We sort by abs because slight complex noise can happen
        eigenvalues = np.sort(np.abs(eigenvalues))
        
        # The largest eigenval should be approx 1.0 (trivial solution).
        # We want the one just before it.
        if len(eigenvalues) >= 2:
            second_largest = eigenvalues[-2]
            mcc = np.sqrt(second_largest) if second_largest >= 0 else 0.0
        else:
            mcc = 0.0
    except Exception:
        mcc = 0.0

    Idmn = np.sum(P / (1 + (i_minus_j_sq / (Ng**2))))
    Idn = np.sum(P / (1 + (i_minus_j / Ng)))
    
    mask_neq = (i != j)
    inv_var = np.sum(P[mask_neq] / i_minus_j_sq[mask_neq]) if np.any(mask_neq) else 0.0
    
    max_prob = np.max(P)
    
    k_vals_sum = i + j
    # Min sum is 2 (1+1). bincount index 0 corresponds to sum=2.
    px_plus_y = np.bincount(k_vals_sum.ravel() - 2, weights=P.ravel())
    k_indices_sum = np.arange(len(px_plus_y)) + 2 
    
    sum_avg = np.sum(k_indices_sum * px_plus_y)
    sum_ent = -np.sum(px_plus_y * np.log2(px_plus_y + eps))
    
    # Sum Squares (Variance relative to mean)
    # Use 1D logic again for safety
    sum_squares = np.sum(((k_values - ux)**2) * px)
    
    hx = -np.sum(px * np.log2(px + eps))
    hy = -np.sum(py * np.log2(py + eps))
    hxy = joint_ent
    
    p_log_px_py = np.log2(px_safe[i-1] * py_safe[j-1] + eps)
    hxy1 = -np.sum(P * p_log_px_py)
    
    px_py = np.outer(px, py)
    hxy2 = -np.sum(px_py * np.log2(px_py + eps))
    
    imc1 = (hxy - hxy1) / max(hx, hy) if max(hx, hy) != 0 else 0
    imc2_term = np.exp(-2 * (hxy2 - hxy))
    imc2 = np.sqrt(1 - imc2_term) if imc2_term <= 1 else 0

    return {
        'Autocorrelation': autocorr,
        'ClusterProminence': cluster_prom,
        'ClusterShade': cluster_shade,
        'ClusterTendency': cluster_tend,
        'Contrast': contrast,
        'Correlation': correlation,
        'DifferenceAverage': diff_avg,
        'DifferenceEntropy': diff_ent,
        'DifferenceVariance': diff_var,
        'Id': Id, 'Idm': Idm, 'Idmn': Idmn, 'Idn': Idn, 'Imc1': imc1, 'Imc2': imc2,
        'InverseVariance': inv_var,
        'JointAverage': joint_avg,
        'JointEnergy': joint_energy,
        'JointEntropy': joint_ent,
        'MCC': mcc,
        'MaximumProbability': max_prob,
        'SumAverage': sum_avg,
        'SumEntropy': sum_ent,
        'SumSquares': sum_squares
    }



[docs]
def glcm_units(intensity_unit='HU'):
    """
    Returns units for GLCM features.

    Args:
        intensity_unit (str, optional): The unit of pixel intensity (e.g., 'HU', 'GV', ''). Default is 'HU'.

    Returns:
        dict: Dictionary mapping feature names to their units.

    Example:
        >>> from numpyradiomics import glcm_units
        >>> units = glcm_units(intensity_unit='HU')
        >>> print(units['Contrast'])
        HU^2
    """
    base_unit = intensity_unit
    
    return {
        "Autocorrelation": f"{base_unit}^2",       # sum(i * j * P) -> I * I * 1
        "ClusterProminence": f"{base_unit}^4",     # sum((i+j-u)^4 * P) -> I^4
        "ClusterShade": f"{base_unit}^3",          # sum((i+j-u)^3 * P) -> I^3
        "ClusterTendency": f"{base_unit}^2",       # sum((i+j-u)^2 * P) -> I^2
        "Contrast": f"{base_unit}^2",              # sum((i-j)^2 * P) -> I^2
        "Correlation": "",                         # (Covariance / (std * std)) -> I^2 / I^2 = 1
        "DifferenceAverage": base_unit,            # sum(k * P_diff) -> I * 1
        "DifferenceEntropy": "",                   # Entropy (bits)
        "DifferenceVariance": f"{base_unit}^2",    # Variance of difference -> I^2
        "Id": "",                                  # Inverse Difference (1 / (1 + |i-j|)) -> Dimensionless
        "Idm": "",                                 # Inverse Difference Moment (1 / (1 + (i-j)^2)) -> Dimensionless
        "Idmn": "",                                # Normalized -> Dimensionless
        "Idn": "",                                 # Normalized -> Dimensionless
        "Imc1": "",                                # Correlation measure -> Dimensionless
        "Imc2": "",                                # Correlation measure -> Dimensionless
        "InverseVariance": f"{base_unit}^-2",      # sum(P / |i-j|^2) -> 1 / I^2
        "JointAverage": base_unit,                 # Mean intensity -> I
        "JointEnergy": "",                         # sum(P^2) -> Dimensionless
        "JointEntropy": "",                        # Entropy (bits)
        "MCC": "",                                 # Correlation coeff -> Dimensionless
        "MaximumProbability": "",                  # Probability value -> Dimensionless
        "SumAverage": base_unit,                   # sum(k * P_sum) -> I * 1
        "SumEntropy": "",                          # Entropy (bits)
        "SumSquares": f"{base_unit}^2",            # Variance from mean -> I^2
    }