{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Interoperability\n", "\n", "This notebook shows some way that you can import and export from `spatialproteomics`." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "import spatialproteomics as sp\n", "import pandas as pd\n", "import xarray as xr\n", "import os\n", "import shutil\n", "import anndata\n", "xr.set_options(display_style='text')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Importing Data\n", "\n", "In the example workflow, you have already seen how to read data from a tiff file. If you already have your data in `spatialdata` format, you can also read it in from there. " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "root_attr: multiscales\n", "root_attr: omero\n", "datasets [{'coordinateTransformations': [{'scale': [1.0, 1.0, 1.0], 'type': 'scale'}], 'path': '0'}]\n", "resolution: 0\n", " - shape ('c', 'y', 'x') = (3, 768, 1024)\n", " - chunks = ['3', '768', '1024']\n", " - dtype = uint8\n", "root_attr: multiscales\n", "root_attr: omero\n" ] }, { "data": { "text/html": [ "
<xarray.Dataset>\n",
       "Dimensions:        (channels: 3, y: 768, x: 1024, cells: 70, features: 2)\n",
       "Coordinates:\n",
       "  * channels       (channels) int64 0 1 2\n",
       "  * y              (y) int64 0 1 2 3 4 5 6 7 ... 760 761 762 763 764 765 766 767\n",
       "  * x              (x) int64 0 1 2 3 4 5 6 ... 1018 1019 1020 1021 1022 1023\n",
       "  * cells          (cells) int64 1 2 3 4 5 6 7 8 9 ... 63 64 65 66 67 68 69 70\n",
       "  * features       (features) <U10 'centroid-0' 'centroid-1'\n",
       "Data variables:\n",
       "    _image         (channels, y, x) uint8 dask.array<chunksize=(3, 768, 1024), meta=np.ndarray>\n",
       "    _segmentation  (y, x) int64 0 0 0 0 0 0 0 0 0 ... 69 69 69 69 69 69 69 69 69\n",
       "    _obs           (cells, features) float64 44.79 402.5 46.1 ... 736.5 890.5
" ], "text/plain": [ "\n", "Dimensions: (channels: 3, y: 768, x: 1024, cells: 70, features: 2)\n", "Coordinates:\n", " * channels (channels) int64 0 1 2\n", " * y (y) int64 0 1 2 3 4 5 6 7 ... 760 761 762 763 764 765 766 767\n", " * x (x) int64 0 1 2 3 4 5 6 ... 1018 1019 1020 1021 1022 1023\n", " * cells (cells) int64 1 2 3 4 5 6 7 8 9 ... 63 64 65 66 67 68 69 70\n", " * features (features) \n", " _segmentation (y, x) int64 0 0 0 0 0 0 0 0 0 ... 69 69 69 69 69 69 69 69 69\n", " _obs (cells, features) float64 44.79 402.5 46.1 ... 736.5 890.5" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = sp.read_from_spatialdata('../../data/spatialdata_example.zarr', image_key='raccoon')\n", "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exporting Data\n", "\n", "Once you are happy with your analysis, you will likely want to export the results. The easiest way to do this is by using the `zarr` format, but `csv`, `anndata`, and `spatialdata` are also supported." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/meyerben/meyerben/.conda/envs/spatialproteomics_env/lib/python3.9/site-packages/xarray/backends/plugins.py:159: RuntimeWarning: 'netcdf4' fails while guessing\n", " warnings.warn(f\"{engine!r} fails while guessing\", RuntimeWarning)\n", "/home/meyerben/meyerben/.conda/envs/spatialproteomics_env/lib/python3.9/site-packages/xarray/backends/plugins.py:159: RuntimeWarning: 'scipy' fails while guessing\n", " warnings.warn(f\"{engine!r} fails while guessing\", RuntimeWarning)\n" ] }, { "data": { "text/html": [ "
<xarray.Dataset>\n",
       "Dimensions:                         (cells: 12560, channels: 56, y: 3000,\n",
       "                                     x: 3000, labels: 9, props: 2, features: 7)\n",
       "Coordinates:\n",
       "  * cells                           (cells) int64 1 2 3 4 ... 12558 12559 12560\n",
       "  * channels                        (channels) <U11 'DAPI' 'Helios' ... 'Ki-67'\n",
       "  * features                        (features) <U14 'CD3_binarized' ... 'cent...\n",
       "  * labels                          (labels) int64 1 2 3 4 5 6 7 8 9\n",
       "  * props                           (props) <U6 '_color' '_name'\n",
       "  * x                               (x) int64 0 1 2 3 4 ... 2996 2997 2998 2999\n",
       "  * y                               (y) int64 0 1 2 3 4 ... 2996 2997 2998 2999\n",
       "Data variables:\n",
       "    _arcsinh_mean                   (cells, channels) float64 3.111 ... 0.4174\n",
       "    _arcsinh_sum                    (cells, channels) float64 8.346 ... 5.224\n",
       "    _image                          (channels, y, x) uint8 4 4 4 4 5 ... 2 2 2 2\n",
       "    _labels                         (labels, props) object '#C8A1A1' 'B' ... 'T'\n",
       "    _obs                            (cells, features) float64 1.0 ... 2.237e+03\n",
       "    _percentage_positive_intensity  (cells, channels) float64 1.0 0.0 ... 1.0\n",
       "    _raw_mean                       (cells, channels) float64 56.02 ... 2.148\n",
       "    _raw_sum                        (cells, channels) float64 1.053e+04 ... 4...\n",
       "    _segmentation                   (y, x) int64 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0
" ], "text/plain": [ "\n", "Dimensions: (cells: 12560, channels: 56, y: 3000,\n", " x: 3000, labels: 9, props: 2, features: 7)\n", "Coordinates:\n", " * cells (cells) int64 1 2 3 4 ... 12558 12559 12560\n", " * channels (channels) " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "zarr_path = \"tmp.zarr\"\n", "\n", "# removing the zarr if it exists\n", "if os.path.exists(zarr_path):\n", " shutil.rmtree(zarr_path)\n", "\n", "# exporting as zarr\n", "ds.drop_encoding().to_zarr(\"tmp.zarr\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exporting Tables to CSV\n", "Let's say you want to export some tables as csvs. This can be done with pandas." ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DAPIHeliosCD10TCF7/TCF1PD-L1BCL-6FOXP3CD69PerforinCD19...CD68CD31CD45CD3CytokeratinCD45ROCD8Granyzme BCD79aKi-67
13.1113320.01.3910401.5322991.7007920.00.00.0000001.0268240.029783...0.3452290.0000002.0181502.4603420.5959981.7194210.7142880.4282760.5282750.458260
22.8049850.01.1683210.0000001.3953410.00.00.0000000.8472620.002073...1.5592740.0000001.2947620.3031090.6428761.3285940.7992082.0290830.4263440.528429
33.3802200.01.7339450.6665752.0201500.00.00.0669951.3974690.013636...0.8223200.0000001.4121992.1536280.7634252.7678381.0369000.5717460.7273350.497415
42.9872830.01.2975330.6079041.5725710.00.00.0035970.9604720.004317...0.2977400.0000001.2428672.1497490.5835742.4731590.8040460.4252010.4271770.436378
53.1200230.01.5428080.0000001.9285610.00.00.1555371.4630690.010959...0.8723040.0793691.0059960.2121050.8948702.2996420.7433290.5188681.0112880.488958
\n", "

5 rows × 56 columns

\n", "
" ], "text/plain": [ " DAPI Helios CD10 TCF7/TCF1 PD-L1 BCL-6 FOXP3 CD69 \\\n", "1 3.111332 0.0 1.391040 1.532299 1.700792 0.0 0.0 0.000000 \n", "2 2.804985 0.0 1.168321 0.000000 1.395341 0.0 0.0 0.000000 \n", "3 3.380220 0.0 1.733945 0.666575 2.020150 0.0 0.0 0.066995 \n", "4 2.987283 0.0 1.297533 0.607904 1.572571 0.0 0.0 0.003597 \n", "5 3.120023 0.0 1.542808 0.000000 1.928561 0.0 0.0 0.155537 \n", "\n", " Perforin CD19 ... CD68 CD31 CD45 CD3 \\\n", "1 1.026824 0.029783 ... 0.345229 0.000000 2.018150 2.460342 \n", "2 0.847262 0.002073 ... 1.559274 0.000000 1.294762 0.303109 \n", "3 1.397469 0.013636 ... 0.822320 0.000000 1.412199 2.153628 \n", "4 0.960472 0.004317 ... 0.297740 0.000000 1.242867 2.149749 \n", "5 1.463069 0.010959 ... 0.872304 0.079369 1.005996 0.212105 \n", "\n", " Cytokeratin CD45RO CD8 Granyzme B CD79a Ki-67 \n", "1 0.595998 1.719421 0.714288 0.428276 0.528275 0.458260 \n", "2 0.642876 1.328594 0.799208 2.029083 0.426344 0.528429 \n", "3 0.763425 2.767838 1.036900 0.571746 0.727335 0.497415 \n", "4 0.583574 2.473159 0.804046 0.425201 0.427177 0.436378 \n", "5 0.894870 2.299642 0.743329 0.518868 1.011288 0.488958 \n", "\n", "[5 rows x 56 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = ds.pp.get_layer_as_df(\"_arcsinh_mean\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# exporting as csv\n", "df.to_csv(\"tmp.csv\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exporting to AnnData\n", "AnnData is a format used by scanpy, which can be useful to create interesting plots and downstream analyses. For this reason, you can export the xarray object as an AnnData object. Note that this object will only store the tabular data, but not the image or the segmentation layer." ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 12560 × 56\n", " obs: 'centroid-0', 'centroid-1', '_labels', '_original_'\n", " uns: 'label_colors'\n", " layers: 'arcsinh_sum', 'raw_mean', 'raw_sum'" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# putting the expression matrix into an anndata object\n", "adata = ds.tl.convert_to_anndata(expression_matrix_key=\"_arcsinh_mean\", \n", " additional_layers={\"arcsinh_sum\": \"_arcsinh_sum\", \"raw_mean\": \"_raw_mean\", \"raw_sum\": \"_raw_sum\"}, \n", " additional_uns={\"label_colors\": \"_labels\"})\n", "adata" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# writing to disk as hdf5\n", "adata.write('tmp.h5ad')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exporting to SpatialData\n", "SpatialData is a data format which is commonly used for spatial omics analysis and combines the power of zarr with anndata. You can export to this data format as well." ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34mINFO \u001b[0m Transposing `data` of type: \u001b[1m<\u001b[0m\u001b[1;95mclass\u001b[0m\u001b[39m \u001b[0m\u001b[32m'dask.array.core.Array'\u001b[0m\u001b[1m>\u001b[0m to \u001b[1m(\u001b[0m\u001b[32m'c'\u001b[0m, \u001b[32m'y'\u001b[0m, \u001b[32m'x'\u001b[0m\u001b[1m)\u001b[0m. \n", "\u001b[34mINFO \u001b[0m Transposing `data` of type: \u001b[1m<\u001b[0m\u001b[1;95mclass\u001b[0m\u001b[39m \u001b[0m\u001b[32m'dask.array.core.Array'\u001b[0m\u001b[1m>\u001b[0m to \u001b[1m(\u001b[0m\u001b[32m'y'\u001b[0m, \u001b[32m'x'\u001b[0m\u001b[1m)\u001b[0m. \n" ] }, { "data": { "text/plain": [ "SpatialData object with:\n", "├── Images\n", "│ └── 'image': SpatialImage[cyx] (56, 3000, 3000)\n", "├── Labels\n", "│ └── 'segmentation': SpatialImage[yx] (3000, 3000)\n", "└── Table\n", " └── AnnData object with n_obs × n_vars = 12560 × 56\n", " obs: 'id', 'region'\n", " uns: 'spatialdata_attrs': AnnData (12560, 56)\n", "with coordinate systems:\n", "▸ 'global', with elements:\n", " image (Images), segmentation (Labels)" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spatialdata_object = ds.tl.convert_to_spatialdata(expression_matrix_key=\"_arcsinh_mean\")\n", "spatialdata_object" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "root_attr: channels_metadata\n", "root_attr: multiscales\n", "datasets [{'coordinateTransformations': [{'scale': [1.0, 1.0, 1.0], 'type': 'scale'}], 'path': '0'}]\n", "resolution: 0\n", " - shape ('c', 'y', 'x') = (56, 3000, 3000)\n", " - chunks = ['56', '1548 (+ 1452)', '1548 (+ 1452)']\n", " - dtype = uint8\n", "root_attr: image-label\n", "root_attr: multiscales\n", "no parent found for : None\n", "root_attr: image-label\n", "root_attr: multiscales\n", "datasets [{'coordinateTransformations': [{'scale': [1.0, 1.0], 'type': 'scale'}], 'path': '0'}]\n", "resolution: 0\n", " - shape ('y', 'x') = (3000, 3000)\n", " - chunks = ['3000', '3000']\n", " - dtype = int64\n" ] } ], "source": [ "# storing as zarr file\n", "spatialdata_object.write(\"tmp.zarr\")" ] } ], "metadata": { "kernelspec": { "display_name": "tmp_env", "language": "python", "name": "tmp_env" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 4 }