diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
deleted file mode 100644
index ade52e5..0000000
--- a/.github/workflows/deploy.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: Deploy CellRegMap
-
-on:
- push:
- tags:
- - v*
-
-jobs:
- deploy:
- runs-on: ubuntu-latest
- steps:
- - name: Get tag name
- run: |
- echo ${GITHUB_REF##*/}
-
- # clone the repo to the local directory
- - uses: actions/checkout@v2
-
- - name: Set up Python
- uses: actions/setup-python@v2
- with:
- python-version: "3.8"
-
- # In the future, run the tests here
- # - name: Test with pytest
- # run: |
- # python -m unittest test/
-
- # Deploy to pypi
- - name: Build and publish to pypi
- env:
- TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
- run: |
- pip install wheel twine
- python setup.py sdist bdist_wheel
- twine upload dist/*
-
- - name: Login to Docker Hub
- uses: docker/login-action@v1
- with:
- username: ${{ secrets.DOCKERHUB_USERNAME }}
- password: ${{ secrets.DOCKERHUB_TOKEN }}
- # Build and deploy the docker
- - name: Build and publish to docker
- run: |
- DOCKER_TAG=annasecuomo/cellregmap:${GITHUB_REF##*/}
- docker build -t $DOCKER_TAG .
- docker push $DOCKER_TAG
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index ffbcc37..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,125 +0,0 @@
-docs/_autosummary
-
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-# Usually these files are written by a python script from a template
-# before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-
-# pipenv
-# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-# However, in case of collaboration, if having platform-specific dependencies or dependencies
-# having no cross-platform support, pipenv may install dependencies that don’t work, or not
-# install all needed dependencies.
-#Pipfile.lock
-
-# celery beat schedule file
-celerybeat-schedule
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index 7eba10b..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-FROM python:3.8.5
-
-ADD cellregmap /app/cellregmap/cellregmap
-ADD setup.py setup.cfg version.py proof.md LICENSE MANIFEST.in /app/cellregmap/
-RUN pip install /app/cellregmap
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 97fb9fb..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2021 Anna Cuomo
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100755
index 762aa52..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,3 +0,0 @@
-include LICENSE
-include README.md
-include version.py
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..e8cfda9
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,18 @@
+---
+layout: default
+title: NEWS
+---
+
+**`CellRegMap paper out in MSB!`**:
+- CellRegMap is out in Molecular Systems Biology! Check out the [paper](https://www.embopress.org/doi/full/10.15252/msb.202110663) and see here for a [Tweetorial](https://twitter.com/AnnaSECuomo/status/1559805188994580482)!
+
+**`CellRegMap pipeline`**:
+- We have developed a [WDL pipeline](https://github.com/populationgenomics/CellRegMap_pipeline) to facilitate running CellRegMap
+- download the container from [Dockerhub](https://hub.docker.com/repository/docker/annasecuomo/cellregmap_pipeline)!
+
+**`Anna's presentation of CellRegMap at Biology of Genomes`**:
+- Catch Anna's talk at [this link](https://www.biorxiv.org/content/10.1101/2021.09.01.458524v1#video)!
+
+**`CellRegMap preprint`**:
+- the Cellular Regulatory Map (CellRegMap) is out as a [preprint](https://www.biorxiv.org/content/10.1101/2021.09.01.458524v1)!
+- See our [Twitter thread](https://twitter.com/AnnaSECuomo/status/1434059443956862978) for highlights!
diff --git a/README.md b/README.md
index 1bf02e0..da965af 100644
--- a/README.md
+++ b/README.md
@@ -1,47 +1 @@
-# CellRegMap
-
-Cellular Regulatory Map (CellRegMap) is a linear mixed model approach to perform multi-context eQTL mapping by leveraging single cell RNA sequencing (scRNA-seq) data.
-It is related to the previously proposed [StructLMM](https://www.nature.com/articles/s41588-018-0271-0) but importantly it can account for sample structure, including population structure and repeated observations for the same samples, e.g., multiple cells for the same donor.
-
-The CellRegMap model and its applications to both real and simulated data are described in the [CellRegMap manuscript](https://www.embopress.org/doi/full/10.15252/msb.202110663).
-
-For more instructions and tutorials to facilitate usage of the package visit [our website](https://limix.github.io/CellRegMap/)!
-
-## Install
-
-From your command line, enter
-
- python3 -m pip install cellregmap
-
-in your command line.
-
-## Development
-
-To install it in development mode, enter
-
- git clone https://github.com/limix/CellRegMap.git
- cd CellRegMap
- python3 -m pip install -e .
-
-in your command line.
-
-## Installation using a Docker image
-If you use Docker, you can also pull the [pre-build image from dockerhub](https://hub.docker.com/r/annasecuomo/cellregmap).
-
-
-
+Branch for the [webpage](https://limix.github.io/CellRegMap) associated with our [CellRegMap](https://www.biorxiv.org/content/10.1101/2021.09.01.458524v1) model.
diff --git a/__pycache__/version.cpython-38.pyc b/__pycache__/version.cpython-38.pyc
new file mode 100644
index 0000000..565aebe
Binary files /dev/null and b/__pycache__/version.cpython-38.pyc differ
diff --git a/__pycache__/version.cpython-39.pyc b/__pycache__/version.cpython-39.pyc
new file mode 100644
index 0000000..5cbe6c0
Binary files /dev/null and b/__pycache__/version.cpython-39.pyc differ
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 0000000..04ce92d
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,6 @@
+theme: jekyll-theme-cayman
+remote_theme: pages-themes/cayman@v0.2.0
+plugins:
+- jekyll-remote-theme # add this line to the plugins list if you already have one
+# Build settings
+markdown: kramdown
diff --git a/_layouts/default.html b/_layouts/default.html
new file mode 100644
index 0000000..83940de
--- /dev/null
+++ b/_layouts/default.html
@@ -0,0 +1,59 @@
+
+
+
diff --git a/dist/cellregmap-0.0.2.tar.gz b/dist/cellregmap-0.0.2.tar.gz
new file mode 100644
index 0000000..933055c
Binary files /dev/null and b/dist/cellregmap-0.0.2.tar.gz differ
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index d4bb2cb..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS ?=
-SPHINXBUILD ?= sphinx-build
-SOURCEDIR = .
-BUILDDIR = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index 566feac..0000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
-
-
-# -- Project information -----------------------------------------------------
-
-project = 'CellRegMap'
-copyright = '2022, Anna Cuomo'
-author = 'Anna Cuomo'
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- "sphinx.ext.autodoc",
- "sphinx.ext.autosummary",
- "sphinx.ext.napoleon",
- "sphinx.ext.viewcode",
-]
-
-autodoc_default_flags = ["members"]
-autosummary_generate = True
-napoleon_numpy_docstring = True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-#
-pygments_style = "default"
-html_theme = "sphinx_rtd_theme"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
diff --git a/docs/functions.rst b/docs/functions.rst
deleted file mode 100644
index 191bcce..0000000
--- a/docs/functions.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-**********
-CellRegMap
-**********
-
-.. currentmodule:: cellregmap
-
-.. autosummary::
- :toctree: _autosummary
- :template: class.rst
-
- CellRegMap
- run_association
- run_interaction
- estimate_betas
- # lrt_pvalues
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 4610cec..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-.. CellRegMap documentation master file, created by
- sphinx-quickstart on Mon Jan 10 23:46:16 2022.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
-Welcome to CellRegMap's documentation!
-======================================
-
-What is CellRegMap
-------------------
-
-The cellular regulatory map (``cellregmap``) is a statistical framework to map context-specific effects of genetic variants on single-cell gene expression.
-It builds on a linear mixed model and is implemented in Python.
-
-What do you need to run CellRegMap
-----------------------------------
-
-Single-cell expression profiles assayed through scRNA-seq in the form of a count matrix (cells x genes)
-
-Cellular contexts: these can be known factors or a latent representation of the space (e.g., PCs by cells)
-
-Genotypes: CellRegMap tests for individual eQTL effects between a genetic variant and a gene's expression, so genotypes need to be provided for variants of interest.
-
-A kinship matrix accounting for relatedness among samples.
-
-.. toctree::
- :maxdepth: 2
- :caption: Contents:
-
- install
- functions
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/install.rst b/docs/install.rst
deleted file mode 100644
index cb55400..0000000
--- a/docs/install.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-******************
-Install CellRegMap
-******************
-
-Stable version
---------------
-
-``cellregmap`` can be installed `from PyPI `_ with ``pip``:
-::
- python3 -m pip install cellregmap
-
-
-Development version
--------------------
-
-To install ``cellregmap`` in development mode, install it from the `GitHub repository `_:
-::
- git clone https://github.com/limix/CellRegMap.git
- cd CellRegMap
- python3 -m pip install -e
-
-
-Docker installation
--------------------
-
-If you use Docker, you can also pull the `pre-build image from dockerhub `_
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 153be5e..0000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
- echo.
- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
- echo.installed, then set the SPHINXBUILD environment variable to point
- echo.to the full path of the 'sphinx-build' executable. Alternatively you
- echo.may add the Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.https://www.sphinx-doc.org/
- exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index 3bcf96c..0000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-sphinx>=2.1.2
-# cellregmap
-git+https://github.com/limix/CellRegMap.git
diff --git a/images/Cuomo_Anna.png b/images/Cuomo_Anna.png
new file mode 100755
index 0000000..d431490
Binary files /dev/null and b/images/Cuomo_Anna.png differ
diff --git a/images/Horta_Danilo.jpeg b/images/Horta_Danilo.jpeg
new file mode 100644
index 0000000..99dbfa3
Binary files /dev/null and b/images/Horta_Danilo.jpeg differ
diff --git a/images/Tobis_photo.jpeg b/images/Tobis_photo.jpeg
new file mode 100644
index 0000000..0ed2fab
Binary files /dev/null and b/images/Tobis_photo.jpeg differ
diff --git a/index.md b/index.md
new file mode 100644
index 0000000..25c0e6d
--- /dev/null
+++ b/index.md
@@ -0,0 +1,14 @@
+---
+layout: default
+title: CellRegMap
+---
+
+# CellRegMap
+
+The Cellular Regulatory Map (CellRegMap) is a linear mixed model approach to test for and characterize context-specific eQTL variants across several cell contexts and states.
+CellRegMap leverages single cell RNA sequencing (scRNA-seq) data, and does not require discretization of cells into cell groups.
+CellRegMap builds on the previously proposed [StructLMM](https://www.nature.com/articles/s41588-018-0271-0) but importantly can account for repeated observations for the same samples, _e.g._, multiple cells for the same donor, and for population stratification and relatedness.
+
+
+
+For more details on the CellRegMap model and its applications to both real and simulated data, see our [paper](https://www.embopress.org/doi/full/10.15252/msb.202110663).
diff --git a/input_files.md b/input_files.md
new file mode 100644
index 0000000..fcc0076
--- /dev/null
+++ b/input_files.md
@@ -0,0 +1,162 @@
+---
+layout: default
+title: "Input Files"
+mathjax: true
+---
+
+# The CellRegMap model
+
+The CellRegMap model can be cast as:
+
+$y = W\alpha + g\beta_G + g \odot \beta_{GxC} + c + u + \epsilon$,
+
+where
+
+$\beta_{GxC} \sim \mathcal{N} (0, \sigma^2_{GxC}CC^T)$,
+
+$c \sim \mathcal{N} (0, \sigma^2_{C}CC^T)$,
+
+$u \sim \mathcal{N} (0, \sigma^2_{KC}(CC^T \odot K))$, and
+
+$\epsilon \sim \mathcal{N} (0, \sigma^2_n I)$
+
+
+
+
+## Brief description of the model terms
+
+
+The following terms should be provided as input files:
+
+* **Phenotype vector ($y$)** - in the linear mixed model, this is the outcome variable.
+In eQTL mapping, this represents expression level of a given gene of interest, across samples.
+The main application of CellRegMap is using scRNA-seq data, in which case this will be a column vector, with length corresponding to the number of cells considered.
+For optimal fit with the model (which assumes a Gaussian distribution) we recommend [quantile normalising](https://github.com/limix/limix/blob/master/limix/qc/_quant_gauss.py) this vector, or at least [standardising](https://github.com/limix/limix/blob/master/limix/qc/_mean_std.py) it.
+
+* **Genotype vector ($g$)** - SNP vector.
+This represents the genotype of each sample at the genomic locus of interest, and is typically modelled as 0, 1 or 2, representing the number of minor alleles (however, the model can also handle a continuous vector of dosages).
+Note that a genotype file is well defined at the level of donors, and needs to be appropriately [expanded](https://github.com/annacuomo/CellRegMap_analyses/blob/main/endodiff/preprocessing/Expand_genotypes_kinship.ipynb) across cells.
+It is also possible to input a matrix $G$ whose columns represent multiple SNPs ($g$'s) to be tested for that gene (see "Notes" below).
+
+* **Cellular context matrix ($C$)** - cellular environment/context matrix.
+Rows are cells, columns are values across the different cellular contexts.
+Columns of C can for example be principal components, or other latent factor representations of the data (e.g., using MOFA [1], ZINB-WaVE [2] or LDVAE [3]), binary vector encoding assignment to different cellular groups such as cell types, or any other factor, including environmental exposures, or disease state.
+Best practice is to column-standardise this matrix.
+
+* **Kinship matrix ($K$)**, or its decomposition ($hK$, such that $K = hK @ hK^T$), a sample covariance, often the so-called [kinship](https://www.cog-genomics.org/plink/1.9/distance) (or genetic relationship matrix; GRM) matrix, appropriately [expanded](https://github.com/annacuomo/CellRegMap_analyses/blob/main/endodiff/preprocessing/Expand_genotypes_kinship.ipynb) across cells.
+
+
+
+
+* **Covariate matrix ($W$)** - any additional fixed effect terms to include in the model, such as sex or age.
+If no such terms are needed an intercept of ones should be provided.
+
+
+
+The following terms will be estimated by the model:
+
+* **SNP effect sizes**, both due to persistent effects ($\beta_G$) and to GxC interactions ($\beta_{GxC}$) can be estimated using the estimate_betas() function, see [usage page](https://limix.github.io/CellRegMap/usage.html).
+
+* **other inferred parameters** ($\alpha$, $\sigma^2$ values) are estimated by the model but not returned as values.
+
+# Notes
+
+## Necessary inputs
+The model will not run if one of **y, W, g** or **C** is not provided as input.
+
+* The following terms are absolutely necessary: expression phenotypes (**y**), genotypes (**g**), and cellular contexts (**C**).
+
+* A kinship matrix (**K**; or its decomposition **hK**, such that K = hK @ hK.T) is highly recommended, to appropriately account for sample structure, especially the repeatedness across cells from the same individual.
+ * if you do not have access to a GRM, consider providing a block diagonal sample covariance, with blocks corresponding to individuals.
+ * If K (or hK) is not provided, CellRegMap becomes equivalent to [StructLMM](https://limix.github.io/CellRegMap/structlmm.html).
+
+
+* If no covariates (W) are necessary, simply provide a vector of [ones](https://numpy.org/doc/stable/reference/generated/numpy.ones.html) as an intercept term.
+
+
+## Each SNP-gene pair should be tested independently
+The test is run independently for each gene-SNP pair, thus in the model above, **y** and **g** are one-dimensional vectors, representing i) the expression of a single gene and ii) the genotypes at a single SNP, respectively.
+
+* The implementation does allow for multiple SNPs to be tested for a given gene, this can be achieved by providing a matrix G of which each column is a different SNP G=[g_1, .. g_n].
+In this case, the model **simply loops over each SNP and tests one at the time**, then returns a list of p-values, one per SNP.
+
+* On the other hand, each gene needs to be tested separately, as CellRegMap cannot take the full expression matrix as input.
+
+As tests are independent, we recommend parallelising as much as possible, for example submitting independent jobs for each chromosome, gene, or even gene-SNP pair.
+
+## Covariates, cell contexts and repeatedness are fixed
+W, C, hK (and thus K) remain the same across all tests (_i.e._, across all SNP-gene pairs).
+
+# Dimensionality
+
+Specified dimensionality for each of the terms, where n is the total number of cells:
+
+* **y**: n x 1 (only one gene tested at a time)
+* **W**: n x c, where c is the number of fixed effect covariates (_e.g._, age, sex..)
+* **C**: n x k, where k is the number of contexts to test for interactions
+* **G**: n x s, where s is the number of SNPs to be tested for a given gene
+* **hK**: n x p, where p is the number of individuals, decomposition of the n x n kinship matrix K
+
+
+
+
+# Normalization
+
+For optimal model fit, we recommend standardizing or quantile normalizing (to a standard normal distribution) the phenotype vector **y** and column-standardizing the cellular contexts **C**.
+Standardization refers to a transformation of a vector to have 0 mean and standard deviation 1. You can use [StandardScaler](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) for this task.
+Quantile normalization is a rank-normalization which enforces a standard normal distribution of the vector provided.
+For an implementation of quantile-normalization see [here](https://github.com/limix/limix/blob/master/limix/qc/_quant_gauss.py).
+
+
+
+# Pseudocells
+
+This approach refers to the action of grouping together small numbers of similar cells into "pseudocells" to reduce issues due to sparsity and speed up computations by reducing sample size.
+Existing implementations include [Metacell](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1812-2) and the [micro pooling approach](https://yoseflab.github.io/VISION/articles/micropooling.html) within the [Vision](https://www.nature.com/articles/s41467-019-12235-0) pipeline.
+Those approaches do not directly take into account the presence of several genetically distinct donors, which is important here.
+To address this, we recommend using one of these approaches for each donor separately.
+For an implementation of how we computed meta-cells in the CellRegMap manuscript (for the neuronal differentiation data analysis), see [here](https://github.com/annacuomo/CellRegMap_analyses/blob/main/neuroseq/preprocessing/create_metacells.py).
+
+
+# Multiple testing correction
+
+Since thousands of tests are typically run, [multiple testing correction](https://en.wikipedia.org/wiki/Multiple_comparisons_problem) of the test p-values is necessary.
+Below, we provide guidelines for how to correct for multiple testing for the two main tests implemented in CellRegMap.
+Also refer to workflow [here](https://github.com/annacuomo/CellRegMap_analyses/blob/main/endodiff/usage/README.md)
+
+## Association test
+
+Run discovery, two-step multiple testing correction, 1) within gene across SNPs (FWER), 2) across genes (FDR).
+
+
+## Interaction test
+
+Only one SNP per gene, or at least independent.
+If one SNP per gene straight to step 2 (FDR), if multiple but independent Bonferroni as step 1, then step 2.
+
+# References
+
+[1] Argelaguet\*, Velten\* et al., Molecular Systems Biology, 2018 (MOFA: multi-omics factor analysis) - [link](https://www.embopress.org/doi/full/10.15252/msb.20178124)
+
+[2] Risso et al, Nature Communications, 2018 (ZINB-WaVE: zero-inflated negative binomial-based Wanted Variation Extraction) - [link](https://www.nature.com/articles/s41467-017-02554-5)
+
+[3] Svensson et al, Bioinformatics, 2020 (LDVAE: linearly decoded variational autoencoder) - [link](https://academic.oup.com/bioinformatics/article/36/11/3418/5807606)
+
+
+
+
+
+
diff --git a/installation.md b/installation.md
new file mode 100644
index 0000000..17a8816
--- /dev/null
+++ b/installation.md
@@ -0,0 +1,43 @@
+---
+layout: default
+title: "Installation"
+---
+
+## Stable release (easiest)
+
+CellRegMap is implemented as a Python package.
+To install CellRegMap using the pip python installer, enter:
+
+ pip install cellregmap
+
+in your command line.
+
+## Developmental mode
+
+To use the latest features of CellRegMap you can install the latest version from GitHub by entering:
+
+ git clone https://github.com/limix/CellRegMap.git
+ cd CellRegMap
+ pip install -e .
+
+in your command line.
+
+## Installation using a Docker image
+If you use Docker, you can also pull the [pre-build image from dockerhub](https://hub.docker.com/r/annasecuomo/cellregmap).
+
+
+
+
diff --git a/limix.md b/limix.md
new file mode 100644
index 0000000..121e198
--- /dev/null
+++ b/limix.md
@@ -0,0 +1,22 @@
+## Relation to LIMIX
+
+CellRegMap's linear mixed model (LMM) uses the FaST-LMM (Factored Spectrally Transformed Linear Mixed Model) implementation described [here](https://www.nature.com/articles/nmeth.1681) and used within the [LIMIX](https://github.com/limix/limix) framework.
+
+### Linear Mixed Model implementation using LIMIX
+LIMIX is described in [this preprint](https://www.biorxiv.org/content/10.1101/003905v2) and documentation can be found [here](https://limix-tempdoc.readthedocs.io/en/latest/).
+While there are several fast implementations to map eQTLs using correlation or linear regression-based approaches (e.g., [tensorQTL](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1836-7), [matrix eQTL](https://academic.oup.com/bioinformatics/article/28/10/1353/213326?login=true)), to our knowledge LIMIX is the fastest software out there for eQTL association analyses using **linear mixed models** (which allow to better model population stratification and cryptic relatedness, which are prevalent in human genetic data - see for example thread [here](https://twitter.com/shaicarmi/status/1508298704796663808?s=21&t=6xaF5BmozHil3VbXotlGhQ)).
+
+### eQTL mapping using LIMIX
+Specifically, for a LIMIX wrapper to map eQTLs, see the [limix QTL pipeline](https://github.com/single-cell-genetics/limix_qtl) developed in our lab, which we most recently used in our [Genome Biology Publication](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02407-x).
+
+The pipeline provides an easy wrapper to map eQTLs using various methods, automatically re-ordering, subselecting and expanding files to match with each other.
+See the wiki pages for [installation](https://github.com/single-cell-genetics/limix_qtl/wiki/Installation) and [input files](https://github.com/single-cell-genetics/limix_qtl/wiki/Inputs), and an example [snakemake for standard eQTL mapping](https://github.com/single-cell-genetics/limix_qtl/wiki/QTL-mapping-on-small-chunks-using-snakemake).
+
+### Coming Soon
+With [Marc Jan Bonder](https://twitter.com/mjbonder), we are in the process of implementing CellRegMap runners compatible with this pipeline.
+
+Also see our sister project [scDALI](https://pmbio.github.io/scdali/), a model for modelling allelic imbalance in single cells.
+
+
diff --git a/proof.md b/proof.md
deleted file mode 100644
index 142ee3e..0000000
--- a/proof.md
+++ /dev/null
@@ -1,79 +0,0 @@
-K = GGt, Sigma = EEt
-M = K*Sigma
-
-Let & be the Hadamard product.
-
-We known that:
-
- rank(A&B) <= rank(A) * rank(B)
-
-Let
-
- K = U @ Dk @ Ut
- Sigma = V @ Ds @ Vt
-
-for diagonal matrices Dk and Ds.
-
-K & Sigma = K & (Sum_i vi * lambda_i * vi^T ) = Sum_i (K & (vi @ lambda_i @ vi^T))
- = Sum_i ( Lki @ Lki^T )
-
-Let e = |1 1 ... 1|. We have
-
- K & (vi @ lambda_i @ vi^T) = K & (ui @ ui^T)
- = K & (Diag(ui) @ e @ e^T @ Diag(ui))
- = Diag(ui) @ (K & (e @ e^T)) @ Diag(ui) (See (2.10), [1])
- = Diag(ui) @ K @ Diag(ui)
- = Diag(ui) @ G @ Gt @ Diag(ui)
- = Lki @ Lki^T
-
-for Lki = Diag(ui) @ G.
-
-Trial
------
-
-```python
- import numpy as np
-
- K = np.random.randn(3, 3)
- K = K @ K.T
-
- # rank-2 symmetric matrix
- u0 = np.random.randn(3, 1)
- u1 = np.random.randn(3, 1)
- Sigma = u0 @ u0.T + u1 @ u1.T
- sigma, U = np.linalg.eigh(Sigma)
-
- tmp = sigma[1] * U[:, [1]] @ U[:, [1]].T + sigma[2] * U[:, [2]] @ U[:, [2]].T
- # Close to zero
- print(tmp - Sigma)
-
- u1 = np.sqrt(sigma[1]) * U[:, [1]]
- u2 = np.sqrt(sigma[2]) * U[:, [2]]
-
- tmp = u1 @ u1.T + u2 @ u2.T - Sigma
- # Close to zero
- print(tmp)
-
- e = np.ones(3)
-
- G = np.linalg.cholesky(K)
-
- Lk1 = np.diag(u1.ravel()) @ G
- Lk2 = np.diag(u2.ravel()) @ G
-
- # Close to zero
- print(Lk1 @ Lk1.T + Lk2 @ Lk2.T - K * Sigma)
-```
-
-Goal
-----
-
-- tr(K & Sigma) (there is a property)
-- det(K & Sigma)
-- (K & Sigma)^{-1} @ x
-
-References
-----------
-
-[1] Hadamard Products and Multivariate Statistical Analysis.
- https://core.ac.uk/download/pdf/82272966.pdf
diff --git a/references/greater power and computational efficiency.pdf.bz2 b/references/greater power and computational efficiency.pdf.bz2
deleted file mode 100644
index c98aefa..0000000
Binary files a/references/greater power and computational efficiency.pdf.bz2 and /dev/null differ
diff --git a/references/rachel thesis part 1.pdf.bz2 b/references/rachel thesis part 1.pdf.bz2
deleted file mode 100644
index 3e97cc1..0000000
Binary files a/references/rachel thesis part 1.pdf.bz2 and /dev/null differ
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index ebfda63..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,69 +0,0 @@
-[metadata]
-author = Anna Cuomo, Danilo Horta
-author_email = acuomo@ebi.ac.uk
-classifiers =
- Development Status :: 5 - Production/Stable
- License :: OSI Approved :: MIT License
- Operating System :: OS Independent
- Programming Language :: Python
-description = A linear mixed model framework to map multivariate context-specific eQTL using single-cell RNA-seq data.
-download_url = https://github.com/limix/CellRegMap
-keywords = scRNA-seq, eQTL, context-specific regulation
-license = MIT
-long_description = file: README.md
-long_description_content_type = text/markdown
-maintainer = Danilo Horta
-platforms = Windows, MacOS, Linux
-maintainer_email = horta@ebi.ac.uk
-name = cellregmap
-url = https://github.com/limix/CellRegMap
-version = attr: version.get
-
-[options]
-zip_safe = True
-include_package_data = True
-packages = find:
-python_requires = >= 3.6
-install_requires =
- chiscore>=0.2.3
- glimix-core>=3.1.12
- numpy-sugar>=1.5.1
- numpy>=1.17.5
- pytest>=5.4.3
- scipy>=1.2.3
- tqdm>=4.41.1
-
-[aliases]
-test = pytest
-
-[tool:pytest]
-addopts =
- --doctest-modules
- --doctest-glob='*.rst'
- --ignore="setup.py"
-doctest_plus = enabled
-doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS ALLOW_UNICODE
-doctest_rst = enabled
-norecursedirs = .eggs .git *.egg-info build .ropeproject .undodir old_files
-
-[flake8]
-ignore = E501,E741,E203,W503,W0212,W0622,R0915,E743
-
-[pycodestyle]
-ignore = E741,E743,E203
-max-line-length = 88
-
-[tool:isort]
-multi_line_output=3
-include_trailing_comma=True
-force_grid_wrap=0
-combine_as_imports=True
-line_length=88
-
-[pylint]
-disable = redefined-builtin,R0915
-
-[rstcheck]
-ignore_substitutions = today, version
-ignore_directives = plot, autofunction, command-output, autmodule, automodule, autoclass, autoattribute, automethod, doctest
-ignore_messages = Error in "math" directive
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 2553605..0000000
--- a/setup.py
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-
-from setuptools import setup
-
-if __name__ == "__main__":
- setup()
diff --git a/structlmm.md b/structlmm.md
new file mode 100644
index 0000000..e3e07c6
--- /dev/null
+++ b/structlmm.md
@@ -0,0 +1,25 @@
+---
+layout: default
+title: "StructLMM"
+---
+
+## Relation to StructLMM model
+CellRegMap builds on and extends the structured linear mixed model (StructLMM) model, proposed in [Moore\*, Casale\* et al, 2018](https://www.nature.com/articles/s41588-018-0271-0), in the context of population genetics.
+StructLMM allows to test for GxE effects across multiple environmental exposures at once, extending traditional interaction models which can only consider one environment at a time.
+
+However, StructLMM is not designed to deal with repeated or related samples.
+Thus, it is not well suited to model longitudinal data (where multiple observations from the same individuals are collected over time) or single-cell data (where multiple cells are collected from the same individual), nor can it optimally model population stratification and cryptic relatedness, which have been shown to be prevalent in population genetic data.
+CellRegMap overcomes this by including an additional random effect term that models relatedness across samples.
+
+Nevertheless, the original StructLMM model can be run using CellRegMap, by simply setting the repeatedness term to None, i.e.:
+
+ hK=None
+
+and then running the model similarly to what is described in the [usage page](https://limix.github.io/CellRegMap/usage.html), i.e.:
+
+ from CellRegMap import run_interaction
+
+ pv_slmm = run_interaction(y=y, W=W, E=E, G=g, hK=None)[0]
+ print(f'StructLMM interaction test p-value: {pv_slmm}')
+
+where we note that "E" is used here instead of "C" as typically this model will be applied in the context of population genetics to test for effects with environmental exposures, as opposed to the cellular contexts generally considered in applications of CellRegMap to scRNA-seq data.
diff --git a/tutorials.md b/tutorials.md
new file mode 100644
index 0000000..fdd32e5
--- /dev/null
+++ b/tutorials.md
@@ -0,0 +1,14 @@
+---
+layout: default
+title: "Tutorials"
+---
+
+For basic usage on toy data, see the [Usage page](https://limix.github.io/CellRegMap/usage.html)
+
+
+For applications of CellRegMap described in [our manuscript](https://www.embopress.org/doi/full/10.15252/msb.202110663), see
+* [Application on simulated data & Simulation strategy](https://github.com/annacuomo/CellRegMap_analyses/tree/main/simulations)
+* [Application to single-cell dataset of iPSCs differentiating towards endoderm](https://github.com/annacuomo/CellRegMap_analyses/tree/main/endodiff)
+* [Application to single-cell dataset of iPSCs differentiating towards dopaminergic neurons](https://github.com/annacuomo/CellRegMap_analyses/tree/main/neuroseq)
+
+More to come soon!
diff --git a/usage.md b/usage.md
new file mode 100644
index 0000000..9d2ba43
--- /dev/null
+++ b/usage.md
@@ -0,0 +1,104 @@
+---
+layout: default
+title: "Usage"
+---
+
+There are three main functions that can be run within the CellRegMap package (detailed in our [docs](https://cellregmap.readthedocs.io/)):
+
+* Association test
+* Interaction test
+* Estimation of effect sizes
+
+## Association test (persistent effects)
+
+The main functionality of CellRegMap is to investigate genotype-context (GxC) interactions and identify context-specific genetic effects on expression in cohort-scale single-cell data (see **Interaction test** below).
+However, to improve scalability, we recommend running the (computationally more intensive) interaction-test function only on a set of candidate eQTLs.
+In the [original CellRegMap paper](https://www.biorxiv.org/content/10.1101/2021.09.01.458524v1) we consider eQTLs previously identified in the original studies[1,2], but it is now also possible to test for persistent eQTL effects within the CellRegMap framework itself, using the association-test function.
+In this case, the model can be cast as:
+
+$y = W\alpha + g\beta_G + c + u + \epsilon$,
+
+which is similar to the main model except for the GxC term, which is missing. Here, we test for a persistent effect only, i.e., $\beta_G \neq 0$.
+
+CellRegMap function: _run_association()_
+
+## Interaction test (GxC effects)
+
+This is the main test implemented in CellRegMap, where we test for GxC effects across cellular states and individual SNP variants.
+In this case we consider the full model:
+
+$y = W\alpha + g\beta_G + g \odot \beta_{GxC} + c + u + \epsilon$
+
+and test for $\beta_{GxC} \neq 0$.
+While in principal any SNP-gene pairs can be tested for GxC effects, we recommend running this test on a set of candidate eQTLs (either known a priori or identified using the **Association test** described above), or interesting (e.g., disease-linked) variants to improve statistical power.
+
+CellRegMap function: _run_interaction()_
+
+## Estimation of effect sizes
+
+Finally, CellRegMap can be used to estimate cell-level effect sizes driven by GxC effects for individual eQTLs ($\beta_{GxC}$), thus predicting the cells where those effects are detected.
+Generally, it makes sense to use this function to characterise eQTLs that show evidence of GxC effects, i.e., for which significant p-values were obtained when using the **Interaction test** above.
+The model is the same except for the term $c$, which is now modelled as fixed effects in order to estimate the GxC term itself.
+
+CellRegMap function: _estimate_betas()_
+
+For more details on the tests above and underlying assumptions we refer the reader to the Supplementary Methods available as part of the [paper's Appendix](https://www.embopress.org/action/downloadSupplement?doi=10.15252%2Fmsb.202110663&file=msb202110663-sup-0001-Appendix.pdf).
+
+## Simple usage example
+
+The model is implemented in [python](https://www.python.org).
+All vectors and matrices should be provided as [numpy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html), and there should be no flat (one-dimensional) arrays.
+If the shape of a vector is (n,) please [reshape](https://numpy.org/doc/stable/reference/generated/numpy.reshape.html) to (n,1).
+
+Below, see a simple usage example with toy inputs:
+
+ from numpy import ones
+ from numpy.random import RandomState
+
+ from cellregmap import run_association, run_interaction, estimate_betas
+
+ random = RandomState(1)
+ n = 30 # number of samples (cells)
+ p = 5 # number of individuals
+ k = 4 # number of contexts
+ y = random.randn(n, 1) # outcome vector (expression phenotype, one gene only)
+ C = random.randn(n, k) # context matrix (cells by contexts/factors)
+ W = ones((n, 1)) # intercept (covariate matrix)
+ hK = random.randn(n, p) # decomposition of kinship matrix (K = hK @ hK.T)
+ g = 1.0 * (random.rand(n, 1) < 0.2) # SNP vector
+
+ ## Association test
+ pv0 = run_association(y=y, G=g, W=W, E=C, hK=hK)[0]
+ print(f'Association test p-value: {pv0}')
+
+ ## Interaction test
+ pv = run_interaction(y=y, G=g, W=W, E=C, hK=hK)[0]
+ print(f'Interaction test p-value: {pv}')
+
+ # Effect sizes estimation
+ betas = estimate_betas(y=y, G=g, W=W, E=C, hK=hK)
+ beta_G = betas[0] # persistent effect (scalar)
+ beta_GxC = betas[1][0] # GxC effects (vector)
+
+ print(f'persistent genetic effect (betaG): {betaG}')
+ print(f'cell-level effect sizes due to GxC (betaGxC): {betaGxC}')
+
+
+For more guidelines and instructions on how to construct input files from real data, please visit the [Input Files page](https://limix.github.io/CellRegMap/input_files.html).
+
+For a workflow description language ([WDL](https://openwdl.org/)) pipeline to run CellRegMap across thousands of genes check the pipeline's [Github page](https://github.com/populationgenomics/cellregmap-pipeline) and [Dockerhub repo](https://hub.docker.com/r/annasecuomo/cellregmap_pipeline).
+
+## References
+
+1. Cuomo\*, Seaton\*, McCarthy\* et al, Nature Communications, 2020 - [Single-cell RNA-sequencing of differentiating iPS cells reveals dynamic genetic effects on gene expression](https://www.nature.com/articles/s41467-020-14457-z)
+
+2. Jerber\*, Seaton\*, Cuomo\* et al, Nature Genetics, 2021 - [Population-scale single-cell RNA-seq profiling across dopaminergic neuron differentiation](https://www.nature.com/articles/s41588-021-00801-6)
+
+
+
+
+
diff --git a/version.py b/version.py
deleted file mode 100644
index a3bbb9e..0000000
--- a/version.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import re
-from os.path import join
-
-from setuptools import find_packages
-
-
-def get():
- pkgnames = find_packages()
- if len(pkgnames) == 0:
- return "unknown"
- pkgname = pkgnames[0]
- content = open(join(pkgname, "__init__.py")).read()
- c = re.compile(r"__version__ *= *('[^']+'|\"[^\"]+\")")
- m = c.search(content)
- if m is None:
- return "unknown"
- return m.groups()[0][1:-1]