Model database fetching and inference

Contents

Model database fetching and inference#

Here, we’ll show how to retrieve different types of models from CREsted’s model repository: peak regression models (DeepBICCN and DeepMouseBrain3) and track models (Borzoi Prime).

Load model & set up genome#

from pathlib import Path
import numpy as np
import keras
import crested
import matplotlib.pyplot as plt

# Set the genome
genome = crested.Genome("mm10/genome.fa", "mm10/genome.chrom.sizes")
crested.register_genome(genome)

2026-02-16T14:48:20.230621+0100 INFO Genome genome registered.

model_path, output_names_biccn = crested.get_model("DeepBICCN")
model_biccn = keras.models.load_model(model_path, compile=False)

Region predictions and contribution scores#

chrom, start, end = "chr3", 72535878, 72536378
midpoint = (start+end)//2
start_resized, end_resized = midpoint - 2114//2, midpoint + 2114//2
sequence = genome.fetch(chrom, start_resized, end_resized)

prediction = crested.tl.predict(sequence, model_biccn)

2026-02-16T14:48:25.812594+0100 INFO Lazily importing module crested.tl. This could take a second...
1/1 ━━━━━━━━━━━━━━━━━━━━ 11s 11s/step

%matplotlib inline
crested.pl.region.bar(prediction, classes=output_names_biccn, xtick_rotation=90)

2026-02-16T14:49:39.462694+0100 INFO Lazily importing module crested.pl. This could take a second...

../_images/382dedecc575ae1901bec190b24afb507ba1614673622701284114c1c097c378.png

classes_of_interest = output_names_biccn[np.argmax(prediction)]
class_idx = np.argmax(prediction)

scores, one_hot_encoded_sequences = crested.tl.contribution_scores(
    sequence,
    target_idx=class_idx,
    model=model_biccn,
    batch_size=32,
)

2026-02-16T14:49:45.921519+0100 INFO Calculating contribution scores for 1 class(es) and 1 region(s).

%matplotlib inline
crested.pl.explain.contribution_scores(
    scores,
    one_hot_encoded_sequences,
    class_labels=classes_of_interest,
    zoom_n_bases=500,
    title="Example region",
)  # zoom in on the center 500bp

../_images/30f29d660329cbec3eaab8c149cff82ac7f1b2c84ab1b3f6de1d097aced57891.png

Same region with another model#

model_path, output_names_dmb3 = crested.get_model("DeepMouseBrain3")
model_dmb3 = keras.models.load_model(model_path, compile=False)

prediction = crested.tl.predict(sequence, model_dmb3)

1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 1s/step

fig, ax = crested.pl.region.bar(prediction, classes=output_names_dmb3, xtick_rotation=90, width=45, height=10, show=False)
ax.margins(x=0)

../_images/8d5be8b9b48afb3acec1c8743e41606aec4c38c60e657235e5a7792d59de8e43.png

classes_of_interest = output_names_dmb3[np.argmax(prediction)]
class_idx = np.argmax(prediction)

scores, one_hot_encoded_sequences = crested.tl.contribution_scores(
    sequence,
    target_idx=class_idx,
    model=model_dmb3,
    batch_size=32,
)

2026-02-16T14:50:49.623296+0100 INFO Calculating contribution scores for 1 class(es) and 1 region(s).

%matplotlib inline
crested.pl.explain.contribution_scores(
    scores,
    one_hot_encoded_sequences,
    class_labels=classes_of_interest,
    zoom_n_bases=500,
    title="Example region",
)

../_images/cf96c16bcd98bdd74de63b7818e9c7f7e2b1663288dce1fd64ee05228a6b252a.png

Track predictions#

model_path, output_names_bp = crested.get_model("borzoiprime_mouse_rep0")
model_bp = keras.models.load_model(model_path, compile=False)

start_borzoi, end_borzoi = midpoint - 524288//2, midpoint + 524288//2
start_borzoi_output, end_borzoi_output = midpoint - 196608//2, midpoint + 196608//2
sequence_borzoi = genome.fetch(chrom, start_borzoi, end_borzoi)
prediction = crested.tl.predict(sequence_borzoi, model_bp)

1/1 ━━━━━━━━━━━━━━━━━━━━ 7s 7s/step

%matplotlib inline
class_idx = output_names_bp.index('ATAC:ITL23GL1') # Borzoi Prime Layer 2/3 neuron class

crested.pl.locus.track(
    prediction,
    class_idxs=class_idx,
    coordinates=(chrom, start_borzoi_output, end_borzoi_output),
    class_names=output_names_bp,
    highlight_positions=(start, end),
    show=False
)
plt.show()

../_images/1499190f7b9b564b9b54dde3ed857047dbd233d26f6c1076e3bc2b3f4f317085.png

Gene locus predictions#

With BICCN model#

chrom = "chr4"
start = 91209533
end = 91374781

cell_type = "Sst"
class_idx = output_names_biccn.index(cell_type)

upstream = 50000
downstream = 25000

strand = "-"

scores, coordinates, min_loc, max_loc, tss_position = crested.tl.score_gene_locus(
    chr_name=chrom,
    gene_start=start,
    gene_end=end,
    target_idx=class_idx,
    model=model_biccn,
    strand=strand,
    upstream=upstream,
    downstream=downstream,
    step_size=100,
)

17/19 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
19/19 ━━━━━━━━━━━━━━━━━━━━ 9s 221ms/step

# Optional
bw_dir = Path("/staging/leuven/stg_00002/lcb/nkemp/mouse/biccn/bigwigs/bws/")
bw_path = bw_dir / f"{cell_type}.bw"

values = (
    crested.utils.read_bigwig_region(bw_path, (chrom, start - upstream, end + downstream))
    if strand == "+"
    else crested.utils.read_bigwig_region(bw_path, (chrom, start - downstream, end + upstream))
)
bw_values = values[0]
midpoints = values[1]

%matplotlib inline
crested.pl.locus.locus_scoring(
    scores,
    (min_loc, max_loc),
    gene_start=start,
    gene_end=end,
    title="CREsted prediction around Elavl2 gene locus for Sst",
    bigwig_values=bw_values,
    bigwig_midpoints=midpoints,
)

2026-02-16T14:54:46.854088+0100 WARNING Argument `title` only applying to the top plot is deprecated since version 2.0.0 to make behavior consistent. To keep a primary title, please use `suptitle='CREsted prediction around Elavl2 gene locus for Sst'` or `title=['CREsted prediction around Elavl2 gene locus for Sst', '']`.

../_images/3d655dcac3ab2d5b4c6d86cad561eb3b6ef18ee044efaa64438726f0b8d9e269.png

With DeepMouseBrain3#

scores, coordinates, min_loc, max_loc, tss_position = crested.tl.score_gene_locus(
    chr_name=chrom,
    gene_start=start,
    gene_end=end,
    target_idx=class_idx,
    model=model_dmb3,
    strand=strand,
    upstream=upstream,
    downstream=downstream,
    step_size=100,
)

18/19 ━━━━━━━━━━━━━━━━━━━━ 0s 69ms/step
19/19 ━━━━━━━━━━━━━━━━━━━━ 20s 484ms/step

%matplotlib inline
crested.pl.locus.locus_scoring(
    scores,
    (min_loc, max_loc),
    gene_start=start,
    gene_end=end,
    title="CREsted prediction around Elavl2 gene locus for Sst",
    bigwig_values=bw_values,
    bigwig_midpoints=midpoints,
)

2026-02-16T14:55:08.018731+0100 WARNING Argument `title` only applying to the top plot is deprecated since version 2.0.0 to make behavior consistent. To keep a primary title, please use `suptitle='CREsted prediction around Elavl2 gene locus for Sst'` or `title=['CREsted prediction around Elavl2 gene locus for Sst', '']`.

../_images/6ffc6dfa1645d284f399484b00bc47a8ce559c4914d1aed1b8d4cda7ced1db68.png