writeSparseMatrixToHdf5.js

import * as h5 from "./hdf5.js";
import * as wasm from "./wasm.js";
import * as utils from "./utils.js";

/**
 * Write a sparse {@linkplain ScranMatrix} into HDF5 file, in the form of its compressed sparse components.
 * This can be considered the reverse operation of {@linkcode initializeSparseMatrixFromHDF5}.
 *
 * @param {ScranMatrix} x - An input sparse matrix.
 * @param {string} path - Path to the HDF5 file.
 * A new file will be created if no file is present.
 * @param {string} name - Name of the group inside the HDF5 file in which to save `x`.
 * @param {object} [options={}] - Optional parameters.
 * @param {string} [options.format="tenx_matrix"] - Format to use for saving `x`.
 * This can be one of:
 *
 * - `tenx_matrix`, a compressed sparse column layout where the dimensions are stored in the `shape` dataset.
 * - `csr_matrix`, a compressed sparse column (yes, column) layout where the dimensions are stored in the `shape` attribute of the HDF5 group.
 *   The discrepancy between the name and the layout is a consequence of the original framework operating on the transposed matrix (i.e., features in columns).
 * - `csc_matrix`, a compressed sparse row layout where the dimensions are stored in the `shape` attribute of the group.
 *   Discrepancy is for the same reason as described for `csr_matrix`.
 *
 * @param {boolean} [options.forceInteger=false] - Whether to force non-integer values in `x` to be coerced to integers.
 * @param {boolean} [options.saveShape=true] - Whether to save the dimensions of `x`.
 * This will be stored as a `shape` dataset (for `format = "tenx_matrix")` or attribute (otherwise) in the specified `name`.
 * If `false`, the dimensions are not saved into `name`. 
 * @param {boolean} [options.overwrite=true] - Whether to overwrite an existing HDF5 file at `path`.
 * If `false`, any existing file will be opened in read-write mode, and `x` will be saved into `name` in that file.
 *
 * @return `x` is written to `path` at `name`.
 */
export function writeSparseMatrixToHdf5(x, path, name, options = {}) {
    const { format = "tenx_matrix", forceInteger = false, saveShape = true, overwrite = true, ...others } = options;
    utils.checkOtherOptions(others);

    let csc = true;
    if (format == "tenx_matrix") {
        ;
    } else if (format == "csc_matrix") { // yes, the flip is deliberate, rows are columns because H5AD transposes everything.
        csc = false;
    } else if (format == "csr_matrix") {
        ;
    } else {
        throw new Error("unknown format '" + format + "'");
    }
    wasm.call(module => module.write_sparse_matrix_to_hdf5(x.matrix, path, name, csc, forceInteger, overwrite));

    if (saveShape) {
        let handle = new h5.H5Group(path, name);
        let shape = [x.numberOfRows(), x.numberOfColumns()];

        if (format == "tenx_matrix") {
            handle.writeDataSet("shape", "Int32", null, shape);
        } else if (format == "csr_matrix") {
            handle.writeAttribute("encoding-type", "String", null, "csr_matrix");
            handle.writeAttribute("shape", "Int32", null, shape.reverse()); // yes, because H5AD transposes everything, and so must we.
        } else if (format == "csc_matrix") {
            handle.writeAttribute("encoding-type", "String", null, "csc_matrix");
            handle.writeAttribute("shape", "Int32", null, shape.reverse());
        } else {
            throw new Error("unknown format '" + format + "'");
        }
    }

    return;
}