from .download_raw_data import download_raw_data
from .correct_raw_data import correct_raw_data
from .create_precomputed_volume import create_precomputed_volume
from .correct_stitched_data import correct_stitched_data
from .stitching import run_terastitcher
from .util import (
S3Url,
download_terastitcher_files,
tqdm_joblib,
)
from .visualization import create_viz_link
import numpy as np
from glob import glob
from tqdm import tqdm
import argparse
import os
from joblib import Parallel, delayed
import shutil
[docs]def colm_pipeline(
input_s3_path,
output_s3_path,
channel_of_interest,
autofluorescence_channel,
raw_data_path,
stitched_data_path,
log_s3_path=None,
):
"""Run COLM pipeline including vignetting correction, stitching, illumination correction, and upload to S3 in Neuroglancer-compatible format
Args:
input_s3_path (str): S3 path to raw COLM data. Should be of the form s3://<bucket>/<experiment>
output_s3_path (str): S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed>
channel_of_interest (int): Channel number to operate on. Should be a single integer.
autofluorescence_channel (int): Autofluorescence channel number. Should be a single integer.
raw_data_path (str): Local path where corrected raw data will be stored.
stitched_data_path (str): Local path where stitched slices will be stored.
log_s3_path (str, optional): S3 path at which pipeline intermediates can be stored including bias correction tile and xml files from Terastitcher. Defaults to None.
"""
# get the metadata file paths specific for COLM
input_s3_url = S3Url(input_s3_path.strip("/"))
output_s3_url = S3Url(output_s3_path.strip("/"))
# download raw data onto local SSD
vw0_path = f"{input_s3_url.url}/VW0/"
download_raw_data(vw0_path, channel_of_interest, raw_data_path)
# compute stitching alignments first if you need to
# download stitching files if they exist at log path
if (
not download_terastitcher_files(log_s3_path, raw_data_path)
and channel_of_interest == 0
):
metadata = run_terastitcher(
raw_data_path,
stitched_data_path,
input_s3_path,
log_s3_path=log_s3_path,
compute_only=True,
)
# bias correct all tiles
# save bias correction tile to log_s3_path
correct_raw_data(raw_data_path, channel_of_interest, log_s3_path=log_s3_path)
# now stitch the data with alignments we computed
metadata = run_terastitcher(
raw_data_path,
stitched_data_path,
input_s3_path,
log_s3_path=log_s3_path,
stitch_only=True,
)
# downsample and upload stitched data to S3
stitched_path = glob(f"{stitched_data_path}/RES*")[0]
create_precomputed_volume(
stitched_path, np.array(metadata["voxel_size"]), output_s3_path
)
# correct whole brain bias
# in order to not replicate data (higher S3 cost)
# overwrite original precomputed volume with corrected data
correct_stitched_data(output_s3_path, output_s3_path)
# print viz link to console
# visualize data at 5 microns
viz_link = create_viz_link(
[output_s3_path], output_resolution=np.array([5] * 3) / 1e6
)
print("###################")
print(f"VIZ LINK: {viz_link}")
print("###################")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
"Run COLM pipeline including bias correction, stitching, upoad to S3"
)
parser.add_argument(
"input_s3_path",
help="S3 path to input colm data. Should be of the form s3://<bucket>/<experiment>",
type=str,
)
parser.add_argument(
"output_s3_path",
help="S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed>. The data will be saved at s3://<bucket>/<path_to_precomputed>/CHN0<channel>",
type=str,
)
# parser.add_argument('channel_of_interest', help='Channel of interest in experiment', type=int)
parser.add_argument(
"num_channels", help="Number of channels in experiment", type=int
)
parser.add_argument(
"autofluorescence_channel", help="Autofluorescence channel number.", type=int
)
parser.add_argument(
"--raw_data_path",
help="Local path where corrected raw data will be stored.",
type=str,
default=os.path.expanduser("~/ssd1"),
)
parser.add_argument(
"--stitched_data_path",
help="Local path where stitched slices will be stored.",
type=str,
default=os.path.expanduser("~/ssd2"),
)
parser.add_argument(
"--log_s3_path",
help="S3 path at which pipeline intermediates can be stored including bias correctin tile.",
type=str,
default=None,
)
args = parser.parse_args()
# for all channels in experiment
for i in range(args.num_channels):
output_s3_path = args.output_s3_path.strip("/")
colm_pipeline(
args.input_s3_path,
f"{output_s3_path}/CHN0{i}",
i,
args.autofluorescence_channel,
args.raw_data_path,
args.stitched_data_path,
args.log_s3_path,
)
if i < args.num_channels - 1:
# delete all tiff files in raw_data_path
directories_to_remove = glob(f"{args.raw_data_path}/LOC*")
directories_to_remove.extend(glob(f"{args.stitched_data_path}/RES*"))
with tqdm_joblib(
tqdm(
desc=f"Delete files from CHN0{i}", total=len(directories_to_remove)
)
) as progress_bar:
Parallel(-1)(delayed(shutil.rmtree)(f) for f in directories_to_remove)
# make sure to delete mdata.bin from terastitcher
if os.path.exists(f"{args.raw_data_path}/mdata.bin"):
os.remove(f"{args.raw_data_path}/mdata.bin")