#
# @include "_cr_ana_stages.mro"
#

#
# Copyright (c) 2021 10X Genomics, Inc. All rights reserved.
#
# Code generated by cr_ana.  DO NOT EDIT.
#

filetype bincode.lz4;
filetype h5;
#
# @include "_sc_rna_analyzer_stages.mro"
#

#
# Copyright (c) 2019 10X Genomics, Inc. All rights reserved.
#

filetype csv;
filetype h5;
filetype html;
filetype json;
filetype pickle;
filetype binary;
#
# @include "_common_cloupe_stages.mro"
#

#
# Copyright (c) 2016 10X Genomics, Inc. All rights reserved.
#

filetype cloupe;
filetype csv;
filetype json;
filetype h5;
filetype txt;
#
# @include "_cr_aggr_stages.mro"
#

#
# Copyright (c) 2021 10X Genomics, Inc. All rights reserved.
#
# Code generated by cr_aggr.  DO NOT EDIT.
#

filetype csv;
filetype em.json;
filetype fa;
filetype fasta;
filetype json;
filetype pb;
filetype vloupe;
#
# @include "_sc_rna_aggregator_stages.mro"
#

#
# Copyright (c) 2017 10X Genomics, Inc. All rights reserved.
#

filetype bam;
filetype bam.bai;
filetype csv;
filetype tsv;
filetype fastq;
filetype json;
filetype h5;
filetype html;
filetype pickle;

#
# @include "_cr_aggr_stages.mro"
#

struct VdjAggrCsvLibrary(
    string      library_id,
    path        vdj_contig_info,
    string      donor,
    string      origin,
    map<string> meta,
)

struct VdjAggrInput(
    VdjAggrCsvLibrary[] libraries,
)

struct VdjAggrResults(
    csv    clonotypes,
    fa     donor_ref_fa,
    fasta  consensus_fasta,
    csv    filtered_contig_annotations_csv,
    csv    consensus_annotations_csv,
    json   web_summary_data,
    vloupe vloupe,
)

#
# @include "_cr_ana_stages.mro"
#

stage RUN_DIFFERENTIAL_EXPRESSION_NG(
    in  h5          matrix_h5,
    in  h5          clustering_h5,
    in  int         random_seed,
    in  int         max_clusters,
    in  bool        is_antibody_only,
    out h5          diffexp_h5,
    out path        diffexp_csv,
    src comp        "cr_ana martian diff_exp_stage",
) split (
    in  map[]       cluster_keys,
    out bincode.lz4 diffexp,
) using (
    mem_gb   = 1,
    threads  = 1,
    volatile = strict,
)

stage RUN_GRAPH_CLUSTERING_NG(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  int    num_neighbors,
    in  float  neighbor_a,
    in  float  neighbor_b,
    in  int    num_bcs,
    in  int    input_pcs,
    in  int    balltree_leaf_size,
    in  string similarity_type,
    in  float  resolution,
    in  int    random_seed,
    out h5     clusters_h5,
    out path   clusters_csv,
    src comp   "cr_ana martian graph_clustering_stage",
) split (
) using (
    mem_gb   = 1,
    threads  = 1,
    volatile = strict,
)

stage RUN_PCA_NG(
    in  h5   matrix_h5,
    in  int  random_seed,
    in  int  num_bcs,
    in  int  num_genes,
    in  int  num_pcs,
    in  bool is_antibody_only,
    out h5   pca_h5,
    out path pca_csv,
    src comp "cr_ana martian pca_stage",
) split (
) using (
    mem_gb   = 1,
    threads  = 1,
    volatile = strict,
)

stage RUN_TSNE_NG(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  int    random_seed,
    in  float  perplexity,
    in  int    input_pcs,
    in  int    max_dims,
    in  int    max_iter,
    in  int    stop_lying_iter,
    in  int    mom_switch_iter,
    in  float  theta,
    in  bool   is_antibody_only,
    out h5     tsne_h5,
    out path   tsne_csv,
    src comp   "cr_ana martian tsne_stage",
) split (
    in  int    tsne_dims,
    in  string feature_type,
) using (
    mem_gb   = 1,
    threads  = 1,
    volatile = strict,
)

stage RUN_UMAP_NG(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  int    random_seed,
    in  int    n_neighbors,
    in  int    input_pcs,
    in  int    max_dims,
    in  float  min_dist,
    in  string metric,
    in  bool   is_antibody_only,
    out h5     umap_h5,
    out path   umap_csv,
    src comp   "cr_ana martian umap_stage",
) split (
    in  int    umap_dims,
    in  string feature_type,
) using (
    mem_gb   = 1,
    threads  = 1,
    volatile = strict,
)

#
# @include "_sc_rna_analyzer_stages.mro"
#

stage ANALYZER_PREFLIGHT(
    in  bool   skip,
    in  h5     filtered_matrices_h5,
    in  csv    use_genes,
    in  csv    exclude_genes,
    in  csv    use_bcs,
    in  int    num_analysis_bcs,
    in  int    force_cells,
    in  int    random_seed,
    in  int    num_pca_bcs,
    in  int    num_pca_genes,
    in  int    num_principal_comps,
    in  int    cbc_knn,
    in  float  cbc_alpha,
    in  float  cbc_sigma,
    in  bool   cbc_realign_panorama,
    in  int    max_clusters,
    in  int    graphclust_neighbors,
    in  float  neighbor_a,
    in  float  neighbor_b,
    in  int    tsne_perplexity,
    in  int    tsne_input_pcs,
    in  int    tsne_max_dims,
    in  int    tsne_max_iter,
    in  int    tsne_stop_lying_iter,
    in  int    tsne_mom_switch_iter,
    in  float  tsne_theta,
    in  int    umap_n_neighbors,
    in  int    umap_input_pcs,
    in  int    umap_max_dims,
    in  float  umap_min_dist,
    in  string umap_metric,
    in  bool   chemistry_batch_correction,
    in  bool   skip_multigenome_analysis,
    out bool   skip,
    out bool   is_antibody_only,
    out bool   disable_run_pca,
    out bool   disable_correct_chemistry_batch,
    out bool   skip_multigenome_analysis,
    src py     "../rna/stages/analyzer/analyzer_preflight",
) using (
    volatile = strict,
)

stage REANALYZER_PREFLIGHT(
    in  h5 filtered_matrices_h5,
    src py "../rna/stages/analyzer/reanalyzer_preflight",
) using (
    volatile = strict,
)

stage REANALYZE_VERIFY_SAMPLE_IDS(
    in  h5    matrix_h5,
    in  map[] sample_defs,
    out map[] sample_defs,
    src py    "../rna/stages/analyzer/reanalyze_verify_sample_ids",
) using (
    volatile = strict,
)

stage PREPROCESS_MATRIX(
    in  h5   matrix_h5,
    in  int  random_seed,
    in  csv  use_genes,
    in  csv  exclude_genes,
    in  csv  use_bcs,
    in  int  num_bcs,
    in  int  force_cells,
    in  bool is_antibody_only,
    out h5   cloupe_matrix_h5,
    out h5   preprocessed_matrix_h5,
    out bool is_multi_genome,
    src py   "../rna/stages/analyzer/preprocess_matrix",
) split (
) using (
    volatile = strict,
)

stage RUN_MULTIGENOME_ANALYSIS(
    in  h5   filtered_matrices_h5,
    in  bool is_multi_genome,
    out path multi_genome_csv,
    out path multi_genome_json,
    out json multi_genome_summary,
    src py   "../rna/stages/analyzer/run_multigenome_analysis",
) split (
) using (
    volatile = strict,
)

stage RUN_PCA(
    in  h5   matrix_h5,
    in  int  random_seed,
    in  int  num_bcs,
    in  int  num_genes,
    in  int  num_pcs,
    in  bool is_antibody_only,
    out h5   pca_h5,
    out path pca_csv,
    src py   "../rna/stages/analyzer/run_pca",
) split (
) using (
    volatile = strict,
)

stage RUN_FBPCA(
    in  h5     matrix_h5,
    in  map[]  library_info,
    in  int    num_pcs,
    in  bool   is_antibody_only,
    out pickle dimred_matrix,
    out pickle matrix_barcode_feature_info,
    src py     "../rna/stages/analyzer/run_fbpca",
) split (
) using (
    volatile = strict,
)

stage RUN_KMEANS(
    in  h5   matrix_h5,
    in  h5   pca_h5,
    in  int  random_seed,
    in  int  max_clusters,
    in  int  num_bcs,
    in  int  num_pcs,
    out h5   kmeans_h5,
    out path kmeans_csv,
    src py   "../rna/stages/analyzer/run_kmeans",
) split (
    in  int  n_clusters,
) using (
    volatile = strict,
)

stage RUN_GRAPH_CLUSTERING(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  int    num_neighbors       "Use this many neighbors",
    in  float  neighbor_a          "Use larger of (a+b*log10(n_cells) neighbors or num_neighbors",
    in  float  neighbor_b          "Use larger of (a+b*log10(n_cells) neighbors or num_neighbors",
    in  int    num_bcs             "Use this many cell-barcodes in clustering",
    in  int    input_pcs           "Use top N PCs",
    in  int    balltree_leaf_size,
    in  string similarity_type     "Type of similarity to use (nn or snn)",
    in  int    random_seed         "Seed for random number generator",
    out h5     chunked_neighbors,
    out h5     clusters_h5,
    out path   clusters_csv,
    src py     "../rna/stages/analyzer/run_graph_clustering",
) split (
    in  pickle neighbor_index,
    in  h5     submatrix,
    in  int    row_start,
    in  int    total_rows,
    in  int    k_nearest,
    in  h5     use_bcs,
) using (
    volatile = strict,
)

stage MERGE_CLUSTERS(
    in  h5   matrix_h5,
    in  h5   pca_h5,
    in  h5   clusters_h5,
    out h5   clusters_h5,
    out path clusters_csv,
    src py   "../rna/stages/analyzer/merge_clusters",
) split (
) using (
    volatile = strict,
)

stage COMBINE_CLUSTERING(
    in  h5   kmeans_h5,
    in  path kmeans_csv,
    in  h5   graphclust_h5,
    in  path graphclust_csv,
    out h5   clustering_h5,
    out path clustering_csv,
    src py   "../rna/stages/analyzer/combine_clustering",
) using (
    volatile = strict,
)

stage RUN_DIFFERENTIAL_EXPRESSION(
    in  h5     matrix_h5,
    in  h5     clustering_h5,
    in  int    random_seed,
    in  int    max_clusters,
    in  bool   is_antibody_only,
    out h5     diffexp_h5,
    out path   diffexp_csv,
    src py     "../rna/stages/analyzer/run_differential_expression",
) split (
    in  string clustering_key,
) using (
    volatile = strict,
)

stage RUN_TSNE(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  int    random_seed,
    in  int    perplexity,
    in  int    input_pcs,
    in  int    max_dims,
    in  int    max_iter,
    in  int    stop_lying_iter,
    in  int    mom_switch_iter,
    in  float  theta,
    in  bool   is_antibody_only,
    out h5     tsne_h5,
    out path   tsne_csv,
    src py     "../rna/stages/analyzer/run_tsne",
) split (
    in  int    tsne_dims,
    in  string feature_type,
) using (
    volatile = strict,
)

stage RUN_UMAP(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  int    random_seed,
    in  int    n_neighbors,
    in  int    input_pcs,
    in  int    max_dims,
    in  float  min_dist,
    in  string metric,
    in  bool   is_antibody_only,
    out h5     umap_h5,
    out path   umap_csv,
    src py     "../rna/stages/analyzer/run_umap",
) split (
    in  int    umap_dims,
    in  string feature_type,
) using (
    volatile = strict,
)

stage SUMMARIZE_ANALYSIS(
    in  h5    matrix_h5,
    in  h5    pca_h5,
    in  h5    clustering_h5,
    in  h5    diffexp_h5,
    in  h5    tsne_h5,
    in  h5    umap_h5,
    in  path  pca_csv,
    in  path  clustering_csv,
    in  path  diffexp_csv,
    in  path  tsne_csv,
    in  path  umap_csv,
    in  json  multi_genome_summary,
    in  path  multi_genome_csv,
    in  path  multi_genome_json,
    in  bool  is_multi_genome,
    in  bool  chemistry_batch_correction,
    in  float batch_score_before_correction,
    in  float batch_score_after_correction,
    out path  analysis,
    out path  analysis_csv,
    out json  summary,
    src py    "../rna/stages/analyzer/summarize_analysis",
) split (
) using (
    volatile = strict,
)

stage PARSE_PARAM_CSV(
    in  csv    params_csv,
    out csv    params_csv,
    out int    num_analysis_bcs,
    out int    random_seed,
    out int    num_pca_bcs,
    out int    num_pca_genes,
    out int    num_principal_comps,
    out int    cbc_knn,
    out float  cbc_alpha,
    out float  cbc_sigma,
    out bool   cbc_realign_panorama,
    out int    max_clusters,
    out int    graphclust_neighbors,
    out float  neighbor_a,
    out float  neighbor_b,
    out int    tsne_perplexity,
    out int    tsne_input_pcs,
    out int    tsne_max_dims,
    out int    tsne_max_iter,
    out int    tsne_stop_lying_iter,
    out int    tsne_mom_switch_iter,
    out float  tsne_theta,
    out int    umap_n_neighbors,
    out int    umap_input_pcs,
    out int    umap_max_dims,
    out float  umap_min_dist,
    out string umap_metric,
    src py     "../rna/stages/analyzer/parse_csv",
) using (
    volatile = strict,
)

stage SUMMARIZE_REANALYSIS(
    in  string sample_id,
    in  string sample_desc,
    in  h5     filtered_matrices,
    in  path   analysis,
    in  json   analyze_matrices_summary,
    out html   web_summary,
    out json   summary,
    out path   feature_bc_matrix_mex,
    src py     "../rna/stages/analyzer/summarize_reanalysis",
) split (
) using (
    volatile = strict,
) retain (
    summary,
)

stage CORRECT_CHEMISTRY_BATCH(
    in  pickle dimred_matrix,
    in  pickle matrix_barcode_feature_info,
    in  map[]  library_info,
    in  int    cbc_knn,
    in  float  cbc_alpha,
    in  float  cbc_sigma,
    in  bool   cbc_realign_panorama,
    out float  batch_score_before_correction,
    out float  batch_score_after_correction,
    out h5     aligned_pca_h5,
    out path   aligned_pca_csv,
    src py     "../rna/stages/analyzer/correct_chemistry_batch",
) split (
    in  int    batch_id,
    in  map    batch_to_bc_indices,
    in  pickle ordered_dimred_matrix,
    in  pickle idx_to_batch_id,
    in  bool   need_reorder_barcode,
    in  pickle barcode_reorder_index,
    out binary batch_nearest_neighbor,
) using (
    mem_gb   = 4,
    volatile = strict,
)

stage CHOOSE_DIMENSION_REDUCTION_OUTPUT(
    in  h5[]   pca_h5_list,
    in  path[] pca_csv_list,
    out h5     pca_h5,
    out path   pca_csv,
    src py     "../rna/stages/analyzer/choose_dimension_reduction_output",
) using (
    volatile = strict,
)

#
# @include "sc_rna_analyzer_ng.mro"
#

pipeline SC_RNA_ANALYZER_NG(
    in  h5     filtered_matrices_h5,
    in  map[]  aggr_library_info,
    in  bool   no_secondary_analysis,
    in  csv    use_genes,
    in  csv    exclude_genes,
    in  csv    use_bcs,
    in  int    num_analysis_bcs,
    in  int    random_seed,
    in  int    num_pca_bcs,
    in  int    num_pca_genes,
    in  int    num_principal_comps,
    in  bool   chemistry_batch_correction,
    in  int    cbc_knn,
    in  float  cbc_alpha,
    in  float  cbc_sigma,
    in  bool   cbc_realign_panorama,
    in  int    max_clusters,
    in  int    graphclust_neighbors,
    in  float  neighbor_a,
    in  float  neighbor_b,
    in  float  graphclust_resolution,
    in  int    tsne_perplexity,
    in  int    tsne_input_pcs,
    in  int    tsne_max_dims,
    in  int    tsne_max_iter,
    in  int    tsne_stop_lying_iter,
    in  int    tsne_mom_switch_iter,
    in  float  tsne_theta,
    in  int    umap_n_neighbors,
    in  int    umap_input_pcs,
    in  int    umap_max_dims,
    in  float  umap_min_dist,
    in  string umap_metric,
    in  int    force_cells,
    in  bool   skip_multigenome_analysis,
    out path   analysis,
    out path   analysis_csv,
    out h5     cloupe_matrix_h5,
    out json   summary,
)
{
    call ANALYZER_PREFLIGHT(
        skip = self.no_secondary_analysis,
        *    = self,
    ) using (
        volatile = true,
    )

    call PREPROCESS_MATRIX(
        matrix_h5        = self.filtered_matrices_h5,
        random_seed      = self.random_seed,
        use_genes        = self.use_genes,
        exclude_genes    = self.exclude_genes,
        use_bcs          = self.use_bcs,
        num_bcs          = self.num_analysis_bcs,
        force_cells      = self.force_cells,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
        volatile = true,
    )

    call RUN_MULTIGENOME_ANALYSIS(
        filtered_matrices_h5 = self.filtered_matrices_h5,
        is_multi_genome      = PREPROCESS_MATRIX.is_multi_genome,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip_multigenome_analysis,
        volatile = true,
    )

    call RUN_PCA_NG as RUN_PCA(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        random_seed      = self.random_seed,
        num_bcs          = self.num_pca_bcs,
        num_genes        = self.num_pca_genes,
        num_pcs          = self.num_principal_comps,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
    ) using (
        disabled = ANALYZER_PREFLIGHT.disable_run_pca,
        volatile = true,
    )

    call RUN_FBPCA(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        library_info     = self.aggr_library_info,
        num_pcs          = self.num_principal_comps,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
    ) using (
        disabled = ANALYZER_PREFLIGHT.disable_correct_chemistry_batch,
        volatile = true,
    )

    call CORRECT_CHEMISTRY_BATCH(
        dimred_matrix               = RUN_FBPCA.dimred_matrix,
        matrix_barcode_feature_info = RUN_FBPCA.matrix_barcode_feature_info,
        library_info                = self.aggr_library_info,
        cbc_knn                     = self.cbc_knn,
        cbc_alpha                   = self.cbc_alpha,
        cbc_sigma                   = self.cbc_sigma,
        cbc_realign_panorama        = self.cbc_realign_panorama,
    ) using (
        disabled = ANALYZER_PREFLIGHT.disable_correct_chemistry_batch,
        volatile = true,
    )

    call CHOOSE_DIMENSION_REDUCTION_OUTPUT(
        pca_h5_list  = [
            RUN_PCA.pca_h5,
            CORRECT_CHEMISTRY_BATCH.aligned_pca_h5,
        ],
        pca_csv_list = [
            RUN_PCA.pca_csv,
            CORRECT_CHEMISTRY_BATCH.aligned_pca_csv,
        ],
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
    )

    call RUN_KMEANS(
        matrix_h5    = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5       = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        random_seed  = self.random_seed,
        max_clusters = self.max_clusters,
        num_bcs      = null,
        num_pcs      = null,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
        volatile = true,
    )

    call RUN_GRAPH_CLUSTERING_NG as RUN_GRAPH_CLUSTERING(
        matrix_h5          = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5             = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        num_neighbors      = self.graphclust_neighbors,
        neighbor_a         = self.neighbor_a,
        neighbor_b         = self.neighbor_b,
        input_pcs          = null,
        num_bcs            = null,
        similarity_type    = "nn",
        balltree_leaf_size = null,
        resolution         = self.graphclust_resolution,
        random_seed        = self.random_seed,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
        volatile = true,
    )

    call COMBINE_CLUSTERING(
        kmeans_h5      = RUN_KMEANS.kmeans_h5,
        kmeans_csv     = RUN_KMEANS.kmeans_csv,
        graphclust_h5  = RUN_GRAPH_CLUSTERING.clusters_h5,
        graphclust_csv = RUN_GRAPH_CLUSTERING.clusters_csv,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
        volatile = true,
    )

    call RUN_DIFFERENTIAL_EXPRESSION_NG as RUN_DIFFERENTIAL_EXPRESSION(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        clustering_h5    = COMBINE_CLUSTERING.clustering_h5,
        random_seed      = self.random_seed,
        max_clusters     = self.max_clusters,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
        volatile = true,
    )

    call RUN_TSNE_NG as RUN_TSNE(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5           = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        random_seed      = self.random_seed,
        perplexity       = self.tsne_perplexity,
        input_pcs        = self.tsne_input_pcs,
        max_dims         = self.tsne_max_dims,
        max_iter         = self.tsne_max_iter,
        stop_lying_iter  = self.tsne_stop_lying_iter,
        mom_switch_iter  = self.tsne_mom_switch_iter,
        theta            = self.tsne_theta,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
        volatile = true,
    )

    call RUN_UMAP_NG as RUN_UMAP(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5           = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        random_seed      = self.random_seed,
        n_neighbors      = self.umap_n_neighbors,
        input_pcs        = self.umap_input_pcs,
        max_dims         = self.umap_max_dims,
        min_dist         = self.umap_min_dist,
        metric           = self.umap_metric,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
        volatile = true,
    )

    call SUMMARIZE_ANALYSIS(
        matrix_h5                     = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5                        = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        clustering_h5                 = COMBINE_CLUSTERING.clustering_h5,
        diffexp_h5                    = RUN_DIFFERENTIAL_EXPRESSION.diffexp_h5,
        tsne_h5                       = RUN_TSNE.tsne_h5,
        umap_h5                       = RUN_UMAP.umap_h5,
        pca_csv                       = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_csv,
        clustering_csv                = COMBINE_CLUSTERING.clustering_csv,
        diffexp_csv                   = RUN_DIFFERENTIAL_EXPRESSION.diffexp_csv,
        tsne_csv                      = RUN_TSNE.tsne_csv,
        umap_csv                      = RUN_UMAP.umap_csv,
        multi_genome_summary          = RUN_MULTIGENOME_ANALYSIS.multi_genome_summary,
        multi_genome_csv              = RUN_MULTIGENOME_ANALYSIS.multi_genome_csv,
        multi_genome_json             = RUN_MULTIGENOME_ANALYSIS.multi_genome_json,
        is_multi_genome               = PREPROCESS_MATRIX.is_multi_genome,
        chemistry_batch_correction    = self.chemistry_batch_correction,
        batch_score_before_correction = CORRECT_CHEMISTRY_BATCH.batch_score_before_correction,
        batch_score_after_correction  = CORRECT_CHEMISTRY_BATCH.batch_score_after_correction,
    ) using (
        disabled = ANALYZER_PREFLIGHT.skip,
    )

    return (
        analysis         = SUMMARIZE_ANALYSIS.analysis,
        analysis_csv     = SUMMARIZE_ANALYSIS.analysis_csv,
        cloupe_matrix_h5 = PREPROCESS_MATRIX.cloupe_matrix_h5,
        summary          = SUMMARIZE_ANALYSIS.summary,
    )
}

#
# @include "_common_cloupe_stages.mro"
#

stage CLOUPE_PREPROCESS(
    in  string pipestance_type,
    in  string sample_id,
    in  string sample_desc,
    in  path   analysis,
    in  h5     filtered_gene_bc_matrices_h5,
    in  json   metrics_json,
    in  csv    aggregation_csv,
    in  json   gem_group_index_json,
    in  path[] tissue_image_paths,
    in  int    dark_images,
    in  csv    tissue_positions_list,
    in  txt    fiducial_positions_list,
    in  json   dzi_info,
    in  path[] dzi_tiles_paths,
    in  json   scale_factors_json,
    in  bool   no_secondary_analysis,
    in  string barcode_whitelist,
    in  json   loupe_map,
    in  string product_type,
    in  json   cells_per_tag,
    in  json   cells_per_protospacer,
    in  csv    spatial_enrichment,
    out cloupe output_for_cloupe,
    out json   gem_group_index_json,
    src py     "../rna/stages/cloupe/cloupe_preprocess",
) split (
) using (
    volatile = strict,
)

#
# @include "_cr_aggr_stages.mro"
#

stage PROCESS_VDJ_PROTO(
    in  VdjAggrCsvLibrary[] libraries,
    in  map                 count_gem_well_map,
    out string              receptor,
    out map                 gem_well_map,
    src comp                "cr_aggr martian process_vdj_proto",
)

stage SETUP_VDJ_AGGR(
    in  VdjAggrCsvLibrary[] libraries,
    in  map                 gem_well_map,
    in  string              receptor,
    out json[]              contig_ann_json_files,
    out csv                 enclone_input_csv,
    out em.json             enclone_gem_well_meta,
    out path                vdj_reference_path,
    out json                combined_ann_json,
    src comp                "cr_aggr martian setup_vdj_aggr",
) split (
    in  int                 chunk_id,
    out json                chunk_ann_json,
    out map                 enclone_meta_row,
    out map                 enclone_gem_well_info,
)

stage RUN_ENCLONE_AGGR(
    in  json[]  contig_ann_json_files,
    in  csv     enclone_input_csv,
    in  em.json enclone_gem_well_meta,
    in  path    vdj_reference_path,
    out pb      enclone_output,
    out fa      donor_ref_fa,
    src comp    "cr_aggr martian run_enclone_aggr",
) using (
    mem_gb  = 9,
    threads = 4,
)

stage PARSE_AGGR_CSV(
    in  path           pipestance_root,
    in  csv            aggregation_csv,
    out csv            aggregation_csv,
    out map[]          count_libraries,
    out VdjAggrInput[] vdj_aggr_inputs,
    out bool           disable_count_aggr,
    out bool           disable_vdj_aggr,
    src comp           "cr_aggr martian parse_aggr_csv",
)

stage WRITE_CONTIG_PROTO(
    in  path   vdj_reference_path,
    in  json   contig_annotations_json,
    in  json   metrics_summary_json,
    in  string receptor,
    in  int[]  gem_wells,
    in  json   cell_barcodes,
    in  string sample_id,
    in  string sample_desc,
    in  string multi_config_sha,
    out pb     vdj_contig_info,
    src comp   "cr_aggr martian write_contig_proto",
)

stage MATCH_VDJ_AGGR_OUTS(
    in  string[]       receptors,
    in  csv[]          clonotypes,
    in  fa[]           donor_ref_fas,
    in  fasta[]        consensus_fastas,
    in  path[]         vdj_reference_paths,
    in  csv[]          filtered_contig_annotations_csvs,
    in  csv[]          consensus_annotations_csvs,
    in  json[]         web_summary_data,
    in  vloupe[]       vloupes,
    out VdjAggrResults vdj_t_results,
    out VdjAggrResults vdj_b_results,
    out path           vdj_reference_path,
    src comp           "cr_aggr martian match_vdj_outs",
)

stage WRITE_AGGR_ANN(
    in  em.json enclone_gem_well_meta,
    in  csv     annotation_csv,
    out csv     augmented_annotation_csv,
    src comp    "cr_aggr martian write_aggr_ann",
)

stage WRITE_WEB_SUMMARY_JSON(
    in  path                vdj_reference_path,
    in  VdjAggrCsvLibrary[] libraries,
    in  pb                  enclone_output,
    in  em.json             enclone_gem_well_meta,
    in  string              sample_id,
    in  string              sample_desc,
    in  csv                 clonotypes_csv,
    in  string              receptor,
    out json                web_summary_content,
    out json                per_origin_hist,
    src comp                "cr_aggr martian write_ws_json",
)

#
# @include "_sc_rna_aggregator_stages.mro"
#

stage AGGREGATOR_PREFLIGHT(
    in  map[]  sample_defs,
    in  string normalization_mode,
    in  bool   is_pd,
    src py     "../rna/stages/aggregator/aggregator_preflight",
) using (
    volatile = strict,
)

stage PARSE_CSV(
    in  path   pipestance_root,
    in  csv    aggregation_csv,
    in  bool   reanalyze,
    in  h5     matrix_h5,
    in  string product_type,
    out csv    aggregation_csv,
    out map[]  sample_defs,
    src py     "../rna/stages/aggregator/parse_csv",
) using (
    volatile = strict,
)

stage CHECK_MOLECULE_INFO_VERSION(
    in  map[]  sample_defs,
    in  string product_type,
    out map[]  updated_sample_defs,
    src py     "../rna/stages/aggregator/check_molecule_info_version",
) split (
    in  int    mol_h5_version,
    in  map    sample_def,
    out map    updated_sample_def,
) using (
    volatile = strict,
)

stage SETUP_SAMPLES(
    in  map[] sample_defs,
    out map   gem_group_index,
    out map[] libraries,
    out json  gem_group_index_json,
    out bool  chemistry_batch_correction,
    src py    "../rna/stages/aggregator/setup_samples",
) using (
    volatile = strict,
)

stage MERGE_MOLECULES(
    in  map[] sample_defs,
    in  map[] libraries,
    out h5    merged_molecules,
    out map   gem_group_barcode_ranges,
    src py    "../rna/stages/aggregator/merge_molecules",
) split (
    in  map   sample_def,
    out path  trimmed_molecules,
    out h5    trimmed_barcodes,
    out map   sample_def,
) using (
    volatile = strict,
)

stage NORMALIZE_DEPTH(
    in  map     gem_group_index,
    in  h5      molecules,
    in  string  normalization_mode,
    in  map     gem_group_barcode_ranges,
    in  float   targeted_depth_factor,
    out h5[]    raw_matrices_h5,
    out int     raw_nnz,
    out h5[]    filtered_matrices_h5,
    out int     filtered_nnz,
    out json    summary,
    src py      "../rna/stages/aggregator/normalize_depth",
) split (
    in  float[] frac_reads_kept,
    in  int[]   num_cells,
    in  int     chunk_start,
    in  int     chunk_len,
    out json    chunk_summary,
    out h5      raw_matrix_h5,
    out h5      filtered_matrix_h5,
) using (
    mem_gb   = 4,
    volatile = strict,
)

stage WRITE_MATRICES(
    in  map[] sample_defs,
    in  map   gem_group_index,
    in  h5    molecules,
    in  h5[]  raw_matrices_h5,
    in  int   raw_nnz,
    in  h5[]  filtered_matrices_h5,
    in  int   filtered_nnz,
    in  json  summary,
    in  bool  is_pd,
    out h5    raw_matrix_h5,
    out h5    filtered_matrix_h5,
    out path  filtered_matrix_mex,
    out h5    barcode_summary_h5,
    out json  summary,
    src py    "../rna/stages/aggregator/write_matrices",
) split (
) using (
    volatile = strict,
)

stage CHECK_INVARIANTS(
    in  map[] input_sample_defs,
    in  h5    merged_raw_gene_bc_matrices_h5,
    out json  summary,
    src py    "../rna/stages/aggregator/check_invariants",
) split (
) using (
    volatile = strict,
)

stage SUMMARIZE_AGGREGATED_REPORTS(
    in  string sample_id,
    in  string sample_desc,
    in  map    gem_group_index,
    in  h5     filtered_matrices_h5,
    in  path   analysis,
    in  json   normalize_depth_summary,
    in  json   analyze_matrices_summary,
    in  string product_type,
    out json   summary,
    out html   web_summary,
    out json   web_summary_data,
    src py     "../rna/stages/aggregator/summarize_aggregated_reports",
) split (
) using (
    volatile = strict,
)

#
# @include "rna/sc_rna_reanalyzer_cs.mro"
#

pipeline SC_RNA_REANALYZER_CS(
    in  string sample_id,
    in  string sample_desc,
    in  h5     filtered_matrices_h5,
    in  csv    params_csv,
    in  csv    aggregation_csv,
    in  csv    barcodes_csv,
    in  csv    genes_csv,
    in  csv    exclude_genes_csv,
    in  int    force_cells,
    out path   analysis                       "Secondary analysis output CSV",
    out html   web_summary                    "Secondary analysis web summary",
    out csv    params                         "Copy of the input parameter CSV",
    out csv    aggregation                    "Copy of the input aggregation CSV",
    out cloupe cloupe                         "Loupe Browser file",
    out path   filtered_feature_bc_matrix     "Filtered feature-barcode matrices MEX",
    out h5     filtered_feature_bc_matrix_h5  "Filtered feature-barcode matrices HDF5"  "filtered_feature_bc_matrix.h5",
)
{
    call REANALYZER_PREFLIGHT(
        filtered_matrices_h5 = self.filtered_matrices_h5,
    ) using (
        preflight = true,
    )

    call PARSE_PARAM_CSV(
        params_csv = self.params_csv,
    )

    call PARSE_AGGR_CSV(
        pipestance_root = ".",
        aggregation_csv = self.aggregation_csv,
    )

    call REANALYZE_VERIFY_SAMPLE_IDS(
        matrix_h5   = self.filtered_matrices_h5,
        sample_defs = PARSE_AGGR_CSV.count_libraries,
    ) using (
        disabled = PARSE_AGGR_CSV.disable_count_aggr,
    )

    call SETUP_SAMPLES(
        sample_defs = REANALYZE_VERIFY_SAMPLE_IDS.sample_defs,
    ) using (
        volatile = true,
    )

    call SC_RNA_ANALYZER_NG as SC_RNA_ANALYZER(
        # don't support barnyard re-analysis (but see note below)
        # NOTE: if using force_cells, this might actually be the raw matrix
        filtered_matrices_h5       = self.filtered_matrices_h5,
        no_secondary_analysis      = false,
        aggr_library_info          = SETUP_SAMPLES.libraries,
        use_genes                  = self.genes_csv,
        exclude_genes              = self.exclude_genes_csv,
        use_bcs                    = self.barcodes_csv,
        num_analysis_bcs           = PARSE_PARAM_CSV.num_analysis_bcs,
        random_seed                = PARSE_PARAM_CSV.random_seed,
        num_pca_bcs                = PARSE_PARAM_CSV.num_pca_bcs,
        num_pca_genes              = PARSE_PARAM_CSV.num_pca_genes,
        num_principal_comps        = PARSE_PARAM_CSV.num_principal_comps,
        chemistry_batch_correction = SETUP_SAMPLES.chemistry_batch_correction,
        cbc_knn                    = PARSE_PARAM_CSV.cbc_knn,
        cbc_alpha                  = PARSE_PARAM_CSV.cbc_alpha,
        cbc_sigma                  = PARSE_PARAM_CSV.cbc_sigma,
        cbc_realign_panorama       = PARSE_PARAM_CSV.cbc_realign_panorama,
        max_clusters               = PARSE_PARAM_CSV.max_clusters,
        graphclust_neighbors       = PARSE_PARAM_CSV.graphclust_neighbors,
        neighbor_a                 = PARSE_PARAM_CSV.neighbor_a,
        neighbor_b                 = PARSE_PARAM_CSV.neighbor_b,
        graphclust_resolution      = null,
        tsne_perplexity            = PARSE_PARAM_CSV.tsne_perplexity,
        tsne_input_pcs             = PARSE_PARAM_CSV.tsne_input_pcs,
        tsne_max_dims              = PARSE_PARAM_CSV.tsne_max_dims,
        tsne_max_iter              = PARSE_PARAM_CSV.tsne_max_iter,
        tsne_stop_lying_iter       = PARSE_PARAM_CSV.tsne_stop_lying_iter,
        tsne_mom_switch_iter       = PARSE_PARAM_CSV.tsne_mom_switch_iter,
        tsne_theta                 = PARSE_PARAM_CSV.tsne_theta,
        umap_n_neighbors           = PARSE_PARAM_CSV.umap_n_neighbors,
        umap_input_pcs             = PARSE_PARAM_CSV.umap_input_pcs,
        umap_max_dims              = PARSE_PARAM_CSV.umap_max_dims,
        umap_min_dist              = PARSE_PARAM_CSV.umap_min_dist,
        umap_metric                = PARSE_PARAM_CSV.umap_metric,
        force_cells                = self.force_cells,
        skip_multigenome_analysis  = false,
    )

    call SUMMARIZE_REANALYSIS(
        sample_id                = self.sample_id,
        sample_desc              = self.sample_desc,
        filtered_matrices        = SC_RNA_ANALYZER.cloupe_matrix_h5,
        analysis                 = SC_RNA_ANALYZER.analysis,
        analyze_matrices_summary = SC_RNA_ANALYZER.summary,
    )

    call CLOUPE_PREPROCESS(
        pipestance_type              = "SC_RNA_REANALYZER_CS",
        sample_id                    = self.sample_id,
        sample_desc                  = self.sample_desc,
        analysis                     = SC_RNA_ANALYZER.analysis,
        filtered_gene_bc_matrices_h5 = SC_RNA_ANALYZER.cloupe_matrix_h5,
        metrics_json                 = null,
        aggregation_csv              = self.aggregation_csv,
        gem_group_index_json         = null,
        tissue_image_paths           = null,
        dark_images                  = null,
        tissue_positions_list        = null,
        fiducial_positions_list      = null,
        dzi_info                     = null,
        dzi_tiles_paths              = null,
        scale_factors_json           = null,
        no_secondary_analysis        = false,
        barcode_whitelist            = null,
        loupe_map                    = null,
        product_type                 = "sc",
        cells_per_tag                = null,
        cells_per_protospacer        = null,
        spatial_enrichment           = null,
    )

    return (
        analysis                      = SC_RNA_ANALYZER.analysis_csv,
        web_summary                   = SUMMARIZE_REANALYSIS.web_summary,
        params                        = PARSE_PARAM_CSV.params_csv,
        aggregation                   = PARSE_AGGR_CSV.aggregation_csv,
        cloupe                        = CLOUPE_PREPROCESS.output_for_cloupe,
        filtered_feature_bc_matrix    = SUMMARIZE_REANALYSIS.feature_bc_matrix_mex,
        filtered_feature_bc_matrix_h5 = SC_RNA_ANALYZER.cloupe_matrix_h5,
    )
}

#
# @include "__dimitri_force_5000.mro"
#

call SC_RNA_REANALYZER_CS(
    sample_id            = "dimitri_force_5000",
    sample_desc          = "",
    filtered_matrices_h5 = "/scratch/etanis/ST-118/results/outs/raw_feature_bc_matrix.h5",
    params_csv           = null,
    aggregation_csv      = null,
    barcodes_csv         = null,
    genes_csv            = null,
    exclude_genes_csv    = null,
    force_cells          = 5000,
)
