Tutorial 2: Spatial Outlier Detection (spLOF)

This notebook demonstrates the workflow for using the GLAND model to calculate Spatial Local Outlier Factor (spLOF) scores.

Environment Configuration

[20]:
#First, we import the necessary libraries and configure the computation device. We also set the R_HOME environment variable to ensure mclust (required for clustering) functions correctly within the Python environment.
import os
import torch
import pandas as pd
import scanpy as sc
from sklearn import metrics
from sklearn.metrics import normalized_mutual_info_score
from GLAND import GLAND
from GLAND.utils import clustering

# Set computation device (GPU is highly recommended for GLAND training)
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Configure R environment for mclust algorithm
# Path mapped to your conda environment
os.environ['R_HOME'] =r'your R'
Using device: cuda:3

Data Loading and Preprocessing

[21]:
#We will use the Liver(JBO2) dataset as an example. This step involves loading the Visium data and ensuring the gene names are unique.
# Configuration
dataset = 'JBO2_removed_20241224100803.h5ad'
n_clusters = 4  # Number of layers/clusters expected
file_fold = r'/mnt/first19T/liufk/x/'  # Path to your data directory

# Load Visium data
adata = sc.read_h5ad("/mnt/first19T/liufk/x/liver/"+dataset)
adata.var_names_make_unique()
print(f"Successfully loaded dataset {dataset}")
adata
Successfully loaded dataset JBO2_removed_20241224100803.h5ad
/mnt/first19T/liufk/anaconda3/envs/GLAND/lib/python3.8/site-packages/anndata/_core/anndata.py:1840: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
[21]:
AnnData object with n_obs × n_vars = 1363 × 31053
    obs: 'in_tissue', 'array_row', 'array_col', 'UMAP_1', 'UMAP_2', 'spot', 'sample', 'type', 'cluster', 'zonation', 'zonationGroup', 'annotation', 'ground_truth'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

spLOF Calculate

[22]:
# Run spLOF  Module
# Initialize and execute the GLAND model
model = GLAND.GLAND(
    adata,
    dataset=dataset,
    n_clusters=n_clusters, lofk=13, spLOF_threshold=3
)
adata=model.adata
Total number of points before filtering: 1363

Number of points per label before filtering:
Label Central: 153 points
Label Mid: 587 points
Label Periportal: 596 points
Label Portal: 27 points

Average LOF value per label:
Label Central: Average LOF = 1.0950
Label Mid: Average LOF = 0.9775
Label Periportal: Average LOF = 1.0479
Label Portal: Average LOF = 2.6107

Filtering top 3% by percentage, calculated threshold is: 1.6023

Total number of points after filtering: 1322
Number of points filtered out: 41

Number of points per label after filtering:
Label Central: 150 points
Label Mid: 585 points
Label Periportal: 583 points
Label Portal: 4 points
../_images/tutorial_2_GLAND-Tutorial2_6_1.png
/mnt/first19T/liufk/GLAND/preprocess.py:115: RuntimeWarning: divide by zero encountered in power
  d_inv_sqrt = np.power(rowsum, -0.5).flatten()
[23]:
# The spLOF scores and outlier labels are stored in adata.obs
adata
[23]:
AnnData object with n_obs × n_vars = 1363 × 31053
    obs: 'in_tissue', 'array_row', 'array_col', 'UMAP_1', 'UMAP_2', 'spot', 'sample', 'type', 'cluster', 'zonation', 'zonationGroup', 'annotation', 'ground_truth', 'lof_score', 'lof_outlier'
    var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
    uns: 'spatial', 'hvg', 'log1p'
    obsm: 'spatial', 'X_pca', 'distance_matrix', 'graph_neigh', 'adj', 'label_CSL', 'feat', 'feat_a'

spLOF Analyze

[24]:
# Extract spLOF scores to visualize distribution across domains with statistical significance testing
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statannotations.Annotator import Annotator

ground_truth_labels = model.adata.obs['ground_truth']
if 'lof_score' in model.adata.obs.columns:
    lof_values = model.adata.obs['lof_score']

df_plot = pd.DataFrame({
    'Domain': ground_truth_labels,
    'LOF': lof_values
})

highlight_label = 'Portal'

order = df_plot.groupby("Domain")["LOF"].median().sort_values(ascending=False).index

sns.set_theme(style="ticks", context="paper")
fig, ax = plt.subplots(figsize=(6, 4.5))

sns.violinplot(
    data=df_plot, x='Domain', y='LOF', order=order,
    palette="Set3", inner=None, cut=0, saturation=1.0, bw_adjust=1.0,
    linewidth=1.0, edgecolor='black', ax=ax
)

sns.boxplot(
    data=df_plot, x='Domain', y='LOF', order=order,
    width=0.15, boxprops={'facecolor': 'white', 'edgecolor': 'black', 'linewidth': 1.0},
    medianprops={'color': 'black', 'linewidth': 1.0},
    whiskerprops={'color': 'black', 'linewidth': 1.0},
    capprops={'color': 'black', 'linewidth': 1.0},
    showcaps=False, flierprops={'marker': ''}, ax=ax
)

if highlight_label in order:
    box_pairs = [(highlight_label, other) for other in order if other != highlight_label]

    if len(box_pairs) > 0:
        annotator = Annotator(ax, box_pairs, data=df_plot, x='Domain', y='LOF', order=order)
        annotator.configure(
            test='Mann-Whitney',
            text_format='star',
            loc='inside',
            verbose=0,
            line_offset=0.05,
            line_offset_to_group=0.05
        )
        annotator.apply_and_annotate()

ax.set_ylabel('spLOF Score', fontsize=14, color='black')
ax.set_xlabel('Domains', fontsize=14, color='black')
ax.tick_params(axis='both', which='major', labelsize=12, direction='out', colors='black')
sns.despine()

plt.tight_layout()
plt.show()
/tmp/ipykernel_604184/489373285.py:23: FutureWarning:

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.violinplot(
../_images/tutorial_2_GLAND-Tutorial2_9_1.png
[25]:
#This step visualizes the quality control results by highlighting spatial outliers detected via spLOF and identifying non-continuous spots within specific tissue structures
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import scanpy as sc

# Prepare data (Grouping)
adata.obs['plot_group'] = adata.obs['ground_truth'].astype(str).where(lambda x: x == 'Portal', 'Other').astype('category')

# Define palettes
palettes = {
    "lof_outlier": {"Filtered": mcolors.to_rgba("#D77071", 1.0), "Retained": mcolors.to_rgba("green", 0.3)},
    "plot_group": {"Portal": mcolors.to_rgba("#D77071", 1.0), "Other": mcolors.to_rgba("lightgray", 1.0)}
}

# Plotting
titles = ["spLOF_Outlier", "Non-continuous Spots"]
with plt.rc_context({'font.size': 16, 'axes.titlesize': 16, 'legend.fontsize': 16}):
    fig, axs = plt.subplots(1, 2, figsize=(14, 6))
    for i, col in enumerate(["lof_outlier", "plot_group"]):
        sc.pl.spatial(adata, color=col, title=titles[i], spot_size=35, palette=palettes[col],
                      frameon=False, show=False, ax=axs[i], img_key=None)
    plt.tight_layout()
    plt.show()
../_images/tutorial_2_GLAND-Tutorial2_10_0.png
[ ]: