Source code for numpyradiomics.mod_glcm

import numpy as np

[docs] def glcm(image, mask, binWidth=25, distances=[1], symmetricalGLCM=True, weightingNorm=None): """ Compute 24 Pyradiomics-style GLCM (Gray Level Co-occurrence Matrix) features. The GLCM describes the second-order joint probability function of an image region, constrained by the mask. It counts how often pairs of voxels with specific gray levels and specific spatial relationships occur. Args: image (np.ndarray): 3D image array containing voxel intensities. mask (np.ndarray): 3D mask array (same shape as image), where non-zero values indicate the ROI. binWidth (float, optional): Width of bins for discretization. Default is 25. distances (list, optional): List of integer distances (offsets) to compute GLCMs for. Default is [1]. symmetricalGLCM (bool, optional): If True, counts co-occurrences in both directions (i->j and j->i). If False, only counts i->j. Default is True. weightingNorm (str, optional): Method to weight GLCMs if multiple distances are used. Options: None, 'manhattan', 'euclidean', 'infinity'. Default is None. Returns: dict: Dictionary containing the 24 GLCM features: - **Autocorrelation**: Magnitude of the fineness and coarseness of texture. - **ClusterProminence**: Asymmetry of the GLCM. - **ClusterShade**: Skewness of the matrix. - **ClusterTendency**: Grouping of voxels with similar gray-level values. - **Contrast**: Local intensity variation (favors contributions away from the diagonal). - **Correlation**: Linear dependency of gray levels to their neighbors. - **DifferenceAverage**: Mean of the difference distribution. - **DifferenceEntropy**: Randomness/variability in neighborhood intensity differences. - **DifferenceVariance**: Variance of the difference distribution. - **JointAverage**: Mean gray level intensity of the $i$ distribution. - **JointEnergy**: Homogeneity of the texture (sum of squared elements). - **JointEntropy**: Randomness/variability in neighborhood intensity values. - **Id**: Inverse Difference (homogeneity 1). - **Idm**: Inverse Difference Moment (homogeneity 2). - **Idmn**: Normalized Inverse Difference Moment. - **Idn**: Normalized Inverse Difference. - **Imc1**: Informational Measure of Correlation 1. - **Imc2**: Informational Measure of Correlation 2. - **InverseVariance**: Inverse variance of the difference distribution. - **MCC**: Maximal Correlation Coefficient. - **MaximumProbability**: Occurrence of the most predominant pair of neighboring intensity values. - **SumAverage**: Mean of the sum distribution. - **SumEntropy**: Sum of entropies. - **SumSquares**: Variance of the distribution (Cluster Tendency is a specific case of this). Example: >>> import numpyradiomics as npr >>> # Generate a noisy ellipsoid >>> img, mask = npr.dro.noisy_ellipsoid(radii_mm=(10, 10, 10), intensity_range=(0, 100)) >>> >>> # Compute GLCM features (distance=1) >>> feats = npr.glcm(img, mask, binWidth=10, distances=[1]) >>> >>> print(f"Contrast: {feats['Contrast']:.4f}") Contrast: 12.4501 """ if not np.any(mask > 0): raise ValueError("Mask contains no voxels") roi = image[mask > 0] Imin = roi.min() # --- FIX: Clamp discretization to avoid epsilon errors --- # If image[i] == Imin, (image - Imin) might be -1e-16, causing floor() to be -1 # This would map valid pixels to bin 0 (background), shifting the mean/autocorr. diff = image - Imin diff[mask > 0] = np.maximum(diff[mask > 0], 0) img_q = np.floor(diff / binWidth).astype(np.int32) + 1 img_q[mask == 0] = 0 levels = int(img_q.max()) # --- FIX 2: Handle Degenerate (Single Gray Level) ROIs --- if levels <= 1: # Returns standard PyRadiomics defaults for flat regions return { 'Autocorrelation': 1.0, 'ClusterProminence': 0.0, 'ClusterShade': 0.0, 'ClusterTendency': 0.0, 'Contrast': 0.0, 'Correlation': 1.0, 'DifferenceAverage': 0.0, 'DifferenceEntropy': 0.0, 'DifferenceVariance': 0.0, 'Id': 1.0, 'Idm': 1.0, 'Idmn': 1.0, 'Idn': 1.0, 'Imc1': 0.0, 'Imc2': 0.0, 'InverseVariance': 0.0, 'JointAverage': 1.0, 'JointEnergy': 1.0, 'JointEntropy': 0.0, 'MCC': 1.0, 'MaximumProbability': 1.0, 'SumAverage': 2.0, 'SumEntropy': 0.0, 'SumSquares': 0.0 } # ... (Step 2: Generate Angles - SAME AS BEFORE) ... dims = image.ndim if dims == 3: angles = [ (0,0,1), (0,1,0), (1,0,0), (0,1,1), (0,1,-1), (1,0,1), (1,0,-1), (1,1,0), (1,-1,0), (1,1,1), (1,1,-1), (1,-1,1), (1,-1,-1) ] else: angles = [(0, 1), (1, 1), (1, 0), (1, -1)] final_offsets = [] d_seq = distances if isinstance(distances, list) else [distances] for d_list in d_seq: d_sub = d_list if isinstance(d_list, list) else [d_list] for d in d_sub: for a in angles: final_offsets.append(np.array(a) * d) # ... (Step 3: Accumulate P Matrix - SAME AS BEFORE) ... P_accum = np.zeros((levels, levels), dtype=np.float64) weights = np.ones(len(final_offsets)) if weightingNorm == 'manhattan': weights = np.array([1.0 / np.sum(np.abs(o)) for o in final_offsets]) elif weightingNorm == 'euclidean': weights = np.array([1.0 / np.linalg.norm(o) for o in final_offsets]) elif weightingNorm == 'infinity': weights = np.array([1.0 / np.max(np.abs(o)) for o in final_offsets]) weights /= np.sum(weights) for offset, w in zip(final_offsets, weights): P_accum += w * _glcm_offset(img_q, mask, offset, levels, symmetricalGLCM) if P_accum.sum() > 0: P_accum /= P_accum.sum() return _compute_features_from_matrix(P_accum)
def _glcm_offset(img, mask, offset, levels, symmetric): slices_src = [] slices_dst = [] for shift in offset: if shift > 0: slices_src.append(slice(0, -shift)) slices_dst.append(slice(shift, None)) elif shift < 0: slices_src.append(slice(-shift, None)) slices_dst.append(slice(0, shift)) else: slices_src.append(slice(None)) slices_dst.append(slice(None)) src_vals = img[tuple(slices_src)] dst_vals = img[tuple(slices_dst)] valid = (src_vals > 0) & (dst_vals > 0) rows = src_vals[valid] - 1 cols = dst_vals[valid] - 1 P = np.zeros((levels, levels), dtype=np.float64) np.add.at(P, (rows, cols), 1) if symmetric: P += P.T return P def _compute_features_from_matrix(P): Ng = P.shape[0] eps = 2e-16 # Grid indices (1-based) i, j = np.indices((Ng, Ng)) + 1 # Marginal probabilities px = np.sum(P, axis=1) # Row sums py = np.sum(P, axis=0) # Col sums # --- FIX 1: Correct Mean Calculation --- # Use 1D index vector for means to avoid 2D broadcasting errors k_values = np.arange(1, Ng + 1) ux = np.sum(k_values * px) uy = np.sum(k_values * py) # Variances # Here broadcasting works correctly because (N,N) * (N,) aligns on columns? # No, to be safe, use explicit reshaping or the grid 'i'. # i is (Ng, Ng) where rows are constant 1, 2... # px is (Ng,). # (i - ux)**2 * px REQUIRES checking broadcast alignment. # It's safer to use the 1D vector: sigx2 = np.sum(((k_values - ux)**2) * px) sigy2 = np.sum(((k_values - uy)**2) * py) sigx = np.sqrt(sigx2) sigy = np.sqrt(sigy2) # ... (Rest of features use 'i' and 'j' grids which is correct) ... autocorr = np.sum(i * j * P) joint_avg = ux cluster_prom = np.sum(((i + j - ux - uy)**4) * P) cluster_shade = np.sum(((i + j - ux - uy)**3) * P) cluster_tend = np.sum(((i + j - ux - uy)**2) * P) contrast = np.sum(((i - j)**2) * P) if sigx * sigy == 0: correlation = 1.0 else: correlation = (np.sum((i - ux) * (j - uy) * P) / (sigx * sigy)) k_vals_diff = np.abs(i - j) px_minus_y = np.bincount(k_vals_diff.ravel(), weights=P.ravel()) k_indices = np.arange(len(px_minus_y)) diff_avg = np.sum(k_indices * px_minus_y) diff_ent = -np.sum(px_minus_y * np.log2(px_minus_y + eps)) diff_var = np.sum(((k_indices - diff_avg)**2) * px_minus_y) joint_energy = np.sum(P**2) joint_ent = -np.sum(P * np.log2(P + eps)) i_minus_j = np.abs(i - j) i_minus_j_sq = i_minus_j**2 Id = np.sum(P / (1 + i_minus_j)) Idm = np.sum(P / (1 + i_minus_j_sq)) # ... inside _compute_features_from_matrix ... # MCC (Maximal Correlation Coefficient) try: px_safe = px.copy(); px_safe[px==0]=1 py_safe = py.copy(); py_safe[py==0]=1 # Calculate Q matrix Q = (P / px_safe[:, None]) @ (P / py_safe[None, :]).T # Calculate eigenvalues eigenvalues = np.linalg.eigvals(Q) # Sort and take the second largest magnitude # We sort by abs because slight complex noise can happen eigenvalues = np.sort(np.abs(eigenvalues)) # The largest eigenval should be approx 1.0 (trivial solution). # We want the one just before it. if len(eigenvalues) >= 2: second_largest = eigenvalues[-2] mcc = np.sqrt(second_largest) if second_largest >= 0 else 0.0 else: mcc = 0.0 except Exception: mcc = 0.0 Idmn = np.sum(P / (1 + (i_minus_j_sq / (Ng**2)))) Idn = np.sum(P / (1 + (i_minus_j / Ng))) mask_neq = (i != j) inv_var = np.sum(P[mask_neq] / i_minus_j_sq[mask_neq]) if np.any(mask_neq) else 0.0 max_prob = np.max(P) k_vals_sum = i + j # Min sum is 2 (1+1). bincount index 0 corresponds to sum=2. px_plus_y = np.bincount(k_vals_sum.ravel() - 2, weights=P.ravel()) k_indices_sum = np.arange(len(px_plus_y)) + 2 sum_avg = np.sum(k_indices_sum * px_plus_y) sum_ent = -np.sum(px_plus_y * np.log2(px_plus_y + eps)) # Sum Squares (Variance relative to mean) # Use 1D logic again for safety sum_squares = np.sum(((k_values - ux)**2) * px) hx = -np.sum(px * np.log2(px + eps)) hy = -np.sum(py * np.log2(py + eps)) hxy = joint_ent p_log_px_py = np.log2(px_safe[i-1] * py_safe[j-1] + eps) hxy1 = -np.sum(P * p_log_px_py) px_py = np.outer(px, py) hxy2 = -np.sum(px_py * np.log2(px_py + eps)) imc1 = (hxy - hxy1) / max(hx, hy) if max(hx, hy) != 0 else 0 imc2_term = np.exp(-2 * (hxy2 - hxy)) imc2 = np.sqrt(1 - imc2_term) if imc2_term <= 1 else 0 return { 'Autocorrelation': autocorr, 'ClusterProminence': cluster_prom, 'ClusterShade': cluster_shade, 'ClusterTendency': cluster_tend, 'Contrast': contrast, 'Correlation': correlation, 'DifferenceAverage': diff_avg, 'DifferenceEntropy': diff_ent, 'DifferenceVariance': diff_var, 'Id': Id, 'Idm': Idm, 'Idmn': Idmn, 'Idn': Idn, 'Imc1': imc1, 'Imc2': imc2, 'InverseVariance': inv_var, 'JointAverage': joint_avg, 'JointEnergy': joint_energy, 'JointEntropy': joint_ent, 'MCC': mcc, 'MaximumProbability': max_prob, 'SumAverage': sum_avg, 'SumEntropy': sum_ent, 'SumSquares': sum_squares }
[docs] def glcm_units(intensity_unit='HU'): """ Returns units for GLCM features. Args: intensity_unit (str, optional): The unit of pixel intensity (e.g., 'HU', 'GV', ''). Default is 'HU'. Returns: dict: Dictionary mapping feature names to their units. Example: >>> from numpyradiomics import glcm_units >>> units = glcm_units(intensity_unit='HU') >>> print(units['Contrast']) HU^2 """ base_unit = intensity_unit return { "Autocorrelation": f"{base_unit}^2", # sum(i * j * P) -> I * I * 1 "ClusterProminence": f"{base_unit}^4", # sum((i+j-u)^4 * P) -> I^4 "ClusterShade": f"{base_unit}^3", # sum((i+j-u)^3 * P) -> I^3 "ClusterTendency": f"{base_unit}^2", # sum((i+j-u)^2 * P) -> I^2 "Contrast": f"{base_unit}^2", # sum((i-j)^2 * P) -> I^2 "Correlation": "", # (Covariance / (std * std)) -> I^2 / I^2 = 1 "DifferenceAverage": base_unit, # sum(k * P_diff) -> I * 1 "DifferenceEntropy": "", # Entropy (bits) "DifferenceVariance": f"{base_unit}^2", # Variance of difference -> I^2 "Id": "", # Inverse Difference (1 / (1 + |i-j|)) -> Dimensionless "Idm": "", # Inverse Difference Moment (1 / (1 + (i-j)^2)) -> Dimensionless "Idmn": "", # Normalized -> Dimensionless "Idn": "", # Normalized -> Dimensionless "Imc1": "", # Correlation measure -> Dimensionless "Imc2": "", # Correlation measure -> Dimensionless "InverseVariance": f"{base_unit}^-2", # sum(P / |i-j|^2) -> 1 / I^2 "JointAverage": base_unit, # Mean intensity -> I "JointEnergy": "", # sum(P^2) -> Dimensionless "JointEntropy": "", # Entropy (bits) "MCC": "", # Correlation coeff -> Dimensionless "MaximumProbability": "", # Probability value -> Dimensionless "SumAverage": base_unit, # sum(k * P_sum) -> I * 1 "SumEntropy": "", # Entropy (bits) "SumSquares": f"{base_unit}^2", # Variance from mean -> I^2 }