diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
new file mode 100644
index 0000000..ade52e5
--- /dev/null
+++ b/.github/workflows/deploy.yaml
@@ -0,0 +1,49 @@
+name: Deploy CellRegMap
+
+on:
+  push:
+    tags:
+      - v*
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Get tag name
+        run: |
+          echo ${GITHUB_REF##*/}
+
+      # clone the repo to the local directory
+      - uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.8"
+
+      # In the future, run the tests here
+      # - name: Test with pytest
+      #   run: |
+      #     python -m unittest test/
+
+      # Deploy to pypi
+      - name: Build and publish to pypi
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: |
+          pip install wheel twine
+          python setup.py sdist bdist_wheel
+          twine upload dist/*
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      # Build and deploy the docker
+      - name: Build and publish to docker
+        run: |
+          DOCKER_TAG=annasecuomo/cellregmap:${GITHUB_REF##*/}
+          docker build -t $DOCKER_TAG .
+          docker push $DOCKER_TAG
diff --git a/.gitignore b/.gitignore
index 38ce427..ffbcc37 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+docs/_autosummary
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..7eba10b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,5 @@
+FROM python:3.8.5
+
+ADD cellregmap /app/cellregmap/cellregmap
+ADD setup.py setup.cfg version.py proof.md LICENSE MANIFEST.in /app/cellregmap/
+RUN pip install /app/cellregmap
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..97fb9fb
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Anna Cuomo
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100755
index 0000000..762aa52
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include LICENSE
+include README.md
+include version.py
diff --git a/README.md b/README.md
index e69de29..1bf02e0 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,47 @@
+# CellRegMap
+
+Cellular Regulatory Map (CellRegMap) is a linear mixed model approach to perform multi-context eQTL mapping by leveraging single cell RNA sequencing (scRNA-seq) data.
+It is related to the previously proposed [StructLMM](https://www.nature.com/articles/s41588-018-0271-0) but importantly it can account for sample structure, including population structure and repeated observations for the same samples, e.g., multiple cells for the same donor.
+
+The CellRegMap model and its applications to both real and simulated data are described in the [CellRegMap manuscript](https://www.embopress.org/doi/full/10.15252/msb.202110663).
+
+For more instructions and tutorials to facilitate usage of the package visit [our website](https://limix.github.io/CellRegMap/)!
+
+## Install
+
+From your command line, enter
+
+    python3 -m pip install cellregmap
+
+in your command line.
+
+## Development
+
+To install it in development mode, enter
+
+    git clone https://github.com/limix/CellRegMap.git
+    cd CellRegMap
+    python3 -m pip install -e .
+
+in your command line.
+
+## Installation using a Docker image 
+If you use Docker, you can also pull the [pre-build image from dockerhub](https://hub.docker.com/r/annasecuomo/cellregmap).
+
+<!-- ## Running tests
+
+From your command line, enter
+
+    python3 setup.py test -->
+<!-- 
+## Project layout
+
+    ├─ old_files/       old scripts
+    ├─ references/      documents on the mathematical concepts
+    └─ CellRegMap/      package implementation
+       └─ test/         test file
+
+## References
+
+- [Exploring Multivariate Gene-Environment Interactions: Models And Applications](https://www.repository.cam.ac.uk/handle/1810/290971)
+- [Optimal tests for rare variant effects in sequencing association studies](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3440237/) [Supplementary material](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3440237/bin/supp_kxs014_kxs014supp.pdf) -->
diff --git a/cellregmap/__init__.py b/cellregmap/__init__.py
new file mode 100644
index 0000000..e8a5609
--- /dev/null
+++ b/cellregmap/__init__.py
@@ -0,0 +1,20 @@
+from ._cellregmap import CellRegMap, run_association, run_interaction, estimate_betas, run_association_fast
+# from ._simulate import create_variances, sample_phenotype, sample_phenotype_gxe
+from ._types import Term
+
+__version__ = "0.0.3"
+
+# these will be imported when adding ``from cellregmap import *``
+__all__ = [
+    "__version__",
+    "CellRegMap",
+    "run_association",
+    "run_association_fast",
+    "run_interaction",
+    "estimate_betas",
+    # "lrt_pvalues",
+    # "sample_phenotype",
+    # "create_variances",
+    # "sample_phenotype_gxe",
+    "Term",
+]
diff --git a/cellregmap/_cellregmap.py b/cellregmap/_cellregmap.py
new file mode 100644
index 0000000..2a81bd4
--- /dev/null
+++ b/cellregmap/_cellregmap.py
@@ -0,0 +1,682 @@
+from typing import Optional
+
+from glimix_core.lmm import LMM
+from numpy import (
+    asarray,
+    atleast_1d,
+    atleast_2d,
+    concatenate,
+    inf,
+    linspace,
+    ones,
+    sqrt,
+    stack,
+)
+from numpy.linalg import cholesky
+from numpy_sugar import ddot
+from numpy_sugar.linalg import economic_qs_linear, economic_svd
+from tqdm import tqdm
+
+from ._math import PMat, QSCov, ScoreStatistic
+
+
+class CellRegMap:
+    """
+    Mixed-model with genetic effect heterogeneity.
+
+    The CellRegMap model can be cast as:
+
+       𝐲 = W𝛂 + 𝐠𝛽₁ + 𝐠⊙𝛃₂ + 𝐞 + 𝐮 + 𝛆,                                             (1)
+
+    where:
+
+        𝛃₂ ~ 𝓝(𝟎, 𝓋₃𝙴₀𝙴₀ᵀ),
+        𝐞 ~ 𝓝(𝟎, 𝓋₁ρ₁𝙴₁𝙴₁ᵀ),
+        𝐮 ~ 𝓝(𝟎, 𝓋₁(1-ρ₁)𝙺⊙𝙴₂𝙴₂ᵀ), and
+        𝛆 ~ 𝓝(𝟎, 𝓋₂𝙸).
+
+    𝐠⊙𝛃 is a random effect term which models the GxC effect.
+    Additionally, W𝛂 models additive covariates and 𝐠𝛽₁ models persistent genetic effects.
+    Both are modelled as fixed effects.
+    On the other hand, 𝐞, 𝐮 and 𝛆 are modelled as random effects
+    𝐞 is the environment effect, 𝐮 is a background term accounting for interactions between population structure
+    and environmental structure, and 𝛆 is the iid noise.
+    The full covariance of 𝐲 is therefore given by:
+
+        cov(𝐲) = 𝓋₃𝙳𝙴₀𝙴₀ᵀ𝙳 + 𝓋₁ρ₁𝙴₁𝙴₁ᵀ + 𝓋₁(1-ρ₁)𝙺⊙𝙴₂𝙴₂ᵀ + 𝓋₂𝙸,
+
+    where 𝙳 = diag(𝐠). Its marginalised form is given by:
+
+        𝐲 ~ 𝓝(W𝛂 + 𝐠𝛽₁, 𝓋₃𝙳𝙴₀𝙴₀ᵀ𝙳 + 𝓋₁(ρ₁𝙴₁𝙴₁ᵀ + (1-ρ₁)𝙺⊙𝙴₂𝙴₂ᵀ) + 𝓋₂𝙸).
+
+    The CellRegMap method is used to perform an interaction test:
+
+    The interaction test compares the following hypotheses (from Eq. 1):
+
+        𝓗₀: 𝓋₃ = 0
+        𝓗₁: 𝓋₃ > 0
+
+    𝓗₀ denotes no GxE effects, while 𝓗₁ models the presence of GxE effects.
+
+    """
+
+    def __init__(self, y, E, W=None, Ls=None, E1=None, hK=None):
+        self._y = asarray(y, float).flatten()
+        self._E0 = asarray(E, float)
+        Ls = [] if Ls is None else Ls
+
+        if W is not None:
+            self._W = asarray(W, float) 
+        else:
+            self._W = ones((self._y.shape[0], 1))
+
+        if E1 is not None:
+            self._E1 = asarray(E1, float)
+        else:
+            self._E1 = asarray(E, float)
+
+
+        self._Ls = list(asarray(L, float) for L in Ls)
+
+        assert self._W.ndim == 2
+        assert self._E0.ndim == 2
+        assert self._E1.ndim == 2
+
+        assert self._y.shape[0] == self._W.shape[0]
+        assert self._y.shape[0] == self._E0.shape[0]
+        assert self._y.shape[0] == self._E1.shape[0]
+
+        for L in Ls:
+            assert self._y.shape[0] == L.shape[0]
+            assert L.ndim == 2
+
+        self._null_lmm_assoc = {}
+
+        self._halfSigma = {}
+        self._Sigma_qs = {}
+        # TODO: remove it after debugging
+        self._Sigma = {}
+        
+        # option to set different background (when Ls are defined, background is K*EEt + EEt)
+        if len(Ls) == 0:
+            # self._rho0 = [1.0]
+            if hK is None:   # EEt only as background
+                self._rho1 = [1.0]
+                self._halfSigma[1.0] = self._E1
+                self._Sigma_qs[1.0] = economic_qs_linear(self._E1, return_q1=False)
+            else:            # hK is decomposition of K, background in this case is K + EEt
+                self._rho1 = linspace(0, 1, 11)
+                for rho1 in self._rho1:
+                    a = sqrt(rho1)
+                    b = sqrt(1 - rho1)
+                    hS = concatenate([a * self._E1] + [b * hK], axis=1)
+                    self._halfSigma[rho1] = hS
+                    self._Sigma_qs[rho1] = economic_qs_linear(
+                        self._halfSigma[rho1], return_q1=False
+                    )
+        else:
+            # self._rho0 = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+            self._rho1 = linspace(0, 1, 11)
+            for rho1 in self._rho1:
+                # Σ = ρ₁𝙴𝙴ᵀ + (1-ρ₁)𝙺⊙E
+                # concatenate((sqrt(rho1) * self._E, sqrt(1 - rho1) * G1), axis=1)
+                # self._Sigma[rho1] = rho1 * self._EE + (1 - rho1) * self._K
+                # self._Sigma_qs[rho1] = economic_qs(self._Sigma[rho1])
+                a = sqrt(rho1)
+                b = sqrt(1 - rho1)
+                hS = concatenate([a * self._E1] + [b * L for L in Ls], axis=1)
+                self._halfSigma[rho1] = hS
+                self._Sigma_qs[rho1] = economic_qs_linear(
+                    self._halfSigma[rho1], return_q1=False
+                )
+
+    @property
+    def n_samples(self):
+        return self._y.shape[0]
+
+    def predict_interaction(self, G, MAF):
+        """
+        Estimate effect sizes for a given set of SNPs
+        """
+        # breakpoint()
+        G = asarray(G, float)
+        E0 = self._E0
+        W = self._W
+        n_snps = G.shape[1]
+        beta_g_s = []
+        beta_gxe_s = []
+
+        p = asarray(atleast_1d(MAF), float)
+        normalization = 1 / sqrt(2 * p * (1 - p))
+
+        for i in range(n_snps):
+            g = G[:, [i]]
+            # mean(𝐲) = W𝛂 + 𝐠𝛽₁ + 𝙴𝝲 = 𝙼𝛃
+            M = concatenate((W, g, E0), axis=1)
+            gE = g * E0
+            best = {"lml": -inf, "rho1": 0}
+            hSigma_p = {}
+            Sigma_qs = {}
+            for rho1 in self._rho1:
+                # Σ[ρ₁] = ρ₁(𝐠⊙𝙴)(𝐠⊙𝙴)ᵀ + (1-ρ₁)𝙺⊙EEᵀ
+                a = sqrt(rho1)
+                b = sqrt(1 - rho1)
+                hSigma_p[rho1] = concatenate(
+                    [a * gE] + [b * L for L in self._Ls], axis=1
+                )
+                # (
+                #     (a * gE, b * self._G), axis=1
+                # )
+                # cov(𝐲) = 𝓋₁Σ[ρ₁] + 𝓋₂𝙸
+                # lmm = Kron2Sum(Y, [[1]], M, hSigma_p[rho1], restricted=True)
+                Sigma_qs[rho1] = economic_qs_linear(
+                    hSigma_p[rho1], return_q1=False
+                )
+                lmm = LMM(self._y, M, Sigma_qs[rho1], restricted=True)
+                lmm.fit(verbose=False)
+
+                if lmm.lml() > best["lml"]:
+                    best["lml"] = lmm.lml()
+                    best["rho1"] = rho1
+                    best["lmm"] = lmm
+
+            # breakpoint()
+            lmm = best["lmm"]
+            # beta_g = 𝛽₁
+            beta_g = lmm.beta[W.shape[1]]
+            # yadj = 𝐲 - 𝙼𝛃
+            yadj = (self._y - lmm.mean()).reshape(self._y.shape[0], 1)
+            rho1 = best["rho1"]
+            v1 = lmm.v0
+            v2 = lmm.v1
+            hSigma_p_qs = economic_qs_linear(hSigma_p[rho1], return_q1=False)
+            qscov = QSCov(hSigma_p_qs[0][0], hSigma_p_qs[1], v1, v2)
+            # v = cov(𝐲)⁻¹(𝐲 - 𝙼𝛃)
+            v = qscov.solve(yadj)
+
+            sigma2_gxe = v1 * rho1
+            beta_gxe = sigma2_gxe * E0 @ (gE.T @ v) * normalization[i]
+            # beta_star = (beta_g * normalization + beta_gxe)
+
+            beta_g_s.append(beta_g)
+            beta_gxe_s.append(beta_gxe)
+
+
+        return (asarray(beta_g_s), stack(beta_gxe_s).T)
+
+    def estimate_aggregate_environment(self, g):
+        g = atleast_2d(g).reshape((g.size, 1))
+        E0 = self._E0
+        gE = g * E0
+        W = self._W
+        M = concatenate((W, g, E0), axis=1)
+        best = {"lml": -inf, "rho1": 0}
+        hSigma_p = {}
+        for rho1 in self._rho1:
+            # Σₚ = ρ₁(𝐠⊙𝙴)(𝐠⊙𝙴)ᵀ + (1-ρ₁)𝙺⊙E
+            a = sqrt(rho1)
+            b = sqrt(1 - rho1)
+            hSigma_p[rho1] = concatenate([a * gE] + [b * L for L in self._Ls], axis=1)
+            # cov(𝐲) = 𝓋₁Σₚ + 𝓋₂𝙸
+            # lmm = Kron2Sum(Y, [[1]], M, hSigma_p[rho1], restricted=True)
+            QS = self._Sigma_qs[rho1]
+            lmm = LMM(self._y, M, QS, restricted=True)
+            lmm.fit(verbose=False)
+
+            if lmm.lml() > best["lml"]:
+                best["lml"] = lmm.lml()
+                best["rho1"] = rho1
+                best["lmm"] = lmm
+
+        lmm = best["lmm"]
+        yadj = self._y - lmm.mean()
+        # rho1 = best["rho1"]
+        v1 = lmm.v0
+        v2 = lmm.v1
+        rho1 = best["rho1"]
+        sigma2_gxe = rho1 * v1
+        hSigma_p_qs = economic_qs_linear(hSigma_p[rho1], return_q1=False)
+        qscov = QSCov(hSigma_p_qs[0][0], hSigma_p_qs[1], v1, v2)
+        # v = cov(𝐲)⁻¹yadj
+        v = qscov.solve(yadj)
+        beta_gxe = sigma2_gxe * gE.T @ v
+
+        return E0 @ beta_gxe
+
+    def scan_association(self, G):
+        info = {"rho1": [], "e2": [], "g2": [], "eps2": []}
+
+        # NULL model
+        best = {"lml": -inf, "rho1": 0}
+        for rho1 in self._rho1:
+            QS = self._Sigma_qs[rho1]
+            # LRT for fixed effects requires ML rather than REML estimation
+            lmm = LMM(self._y, self._W, QS, restricted=False)
+            lmm.fit(verbose=False)
+
+            if lmm.lml() > best["lml"]:
+                best["lml"] = lmm.lml()
+                best["rho1"] = rho1
+                best["lmm"] = lmm
+
+        null_lmm = best["lmm"]
+        info["rho1"].append(best["rho1"])
+        info["e2"].append(null_lmm.v0 * best["rho1"])
+        info["g2"].append(null_lmm.v0 * (1 - best["rho1"]))
+        info["eps2"].append(null_lmm.v1)
+
+        n_snps = G.shape[1]
+        alt_lmls = []
+        for i in tqdm(range(n_snps)):
+            g = G[:, [i]]
+            X = concatenate((self._W, g), axis=1)
+            QS = self._Sigma_qs[best["rho1"]]
+            alt_lmm = LMM(self._y, X, QS, restricted=False)
+            alt_lmm.fit(verbose=False)
+            alt_lmls.append(alt_lmm.lml())
+
+        pvalues = lrt_pvalues(null_lmm.lml(), alt_lmls, dof=1)
+
+        info = {key: asarray(v, float) for key, v in info.items()}
+        return asarray(pvalues, float), info
+    
+    
+    def scan_association_fast(self, G):
+        info = {"rho1": [], "e2": [], "g2": [], "eps2": []}
+
+        # NULL model
+        best = {"lml": -inf, "rho1": 0}
+        for rho1 in self._rho1:
+            QS = self._Sigma_qs[rho1]
+            # LRT for fixed effects requires ML rather than REML estimation
+            lmm = LMM(self._y, self._W, QS, restricted=False)
+            lmm.fit(verbose=False)
+
+            if lmm.lml() > best["lml"]:
+                best["lml"] = lmm.lml()
+                best["rho1"] = rho1
+                best["lmm"] = lmm
+
+        null_lmm = best["lmm"]
+        info["rho1"].append(best["rho1"])
+        info["e2"].append(null_lmm.v0 * best["rho1"])
+        info["g2"].append(null_lmm.v0 * (1 - best["rho1"]))
+        info["eps2"].append(null_lmm.v1)
+        
+        # Alternative model 
+        lmm = null_lmm
+        flmm = lmm.get_fast_scanner()
+        alt_lmls = flmm.fast_scan(G, verbose=False)['lml']
+
+        pvalues = lrt_pvalues(null_lmm.lml(), alt_lmls, dof=1)
+
+        info = {key: asarray(v, float) for key, v in info.items()}
+        return asarray(pvalues, float), info
+
+
+    def scan_interaction(
+        self, G, idx_E: Optional[any] = None, idx_G: Optional[any] = None
+    ):
+        """
+        𝐲 = W𝛂 + 𝐠𝛽₁ + 𝐠⊙𝛃₂ + 𝐞 + 𝐮 + 𝛆
+           [fixed=X]   [H1]
+
+        𝛃₂ ~ 𝓝(𝟎, 𝓋₃𝙴₀𝙴₀ᵀ),
+        𝐞 ~ 𝓝(𝟎, 𝓋₁ρ₁𝙴₁𝙴₁ᵀ),
+        𝐮 ~ 𝓝(𝟎, 𝓋₁(1-ρ₁)𝙺⊙𝙴₂𝙴₂ᵀ), and
+        𝛆 ~ 𝓝(𝟎, 𝓋₂𝙸).
+
+        𝓗₀: 𝓋₃ = 0
+        𝓗₁: 𝓋₃ > 0
+        """
+        # TODO: make sure G is nxp
+        from chiscore import davies_pvalue
+
+        G = asarray(G, float)
+        n_snps = G.shape[1]
+        pvalues = []
+        info = {"rho1": [], "e2": [], "g2": [], "eps2": []}
+
+        for i in tqdm(range(n_snps)):
+            g = G[:, [i]]
+            X = concatenate((self._W, g), axis=1)
+            best = {"lml": -inf, "rho1": 0}
+            # Null model fitting: find best (𝛂, 𝛽₁, 𝓋₁, 𝓋₂, ρ₁)
+            for rho1 in self._rho1:
+                # QS = self._Sigma_qs[rho1]
+                # halfSigma = self._halfSigma[rho1]
+                # Σ = ρ₁𝙴𝙴ᵀ + (1-ρ₁)𝙺⊙E
+                # cov(y₀) = 𝓋₁Σ + 𝓋₂I
+                QS = self._Sigma_qs[rho1]
+                lmm = LMM(self._y, X, QS, restricted=True)
+                lmm.fit(verbose=False)
+
+                if lmm.lml() > best["lml"]:
+                    best["lml"] = lmm.lml()
+                    best["rho1"] = rho1
+                    best["lmm"] = lmm
+
+            lmm = best["lmm"]
+            # H1 via score test
+            # Let K₀ = e²𝙴𝙴ᵀ + g²𝙺⊙E + 𝜀²I
+            # e²=𝓋₁ρ₁
+            # g²=𝓋₁(1-ρ₁)
+            # 𝜀²=𝓋₂
+            # with optimal values 𝓋₁ and 𝓋₂ found above.
+            info["rho1"].append(best["rho1"])
+            info["e2"].append(lmm.v0 * best["rho1"])
+            info["g2"].append(lmm.v0 * (1 - best["rho1"]))
+            info["eps2"].append(lmm.v1)
+            # QS = economic_decomp( Σ(ρ₁) )
+            Q0 = self._Sigma_qs[best["rho1"]][0][0]
+            S0 = self._Sigma_qs[best["rho1"]][1]
+            # e2 = best["lmm"].v0 * best["rho1"]
+            # g2 = best["lmm"].v0 * (1 - best["rho1"])
+            # eps2 = best["lmm"].v1
+            # EE = self._E @ self._E.T
+            # K = self._G @ self._G.T
+            # K0 = e2 * EE + g2 * K + eps2 * eye(K.shape[0])
+            qscov = QSCov(
+                Q0,
+                S0,
+                lmm.v0,  # 𝓋₁
+                lmm.v1,  # 𝓋₂
+            )
+            # start = time()
+            # qscov = QSCov(self._Sigma_qs[best["rho1"]], lmm.C0[0, 0], lmm.C1[0, 0])
+            # print(f"Elapsed: {time() - start}")
+            # X = concatenate((self._E, g), axis=1)
+            # X = concatenate((self._W, g), axis=1)
+
+            # Let P₀ = K₀⁻¹ - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹.
+            P = PMat(qscov, X)
+            # P0 = inv(K0) - inv(K0) @ X @ inv(X.T @ inv(K0) @ X) @ X.T @ inv(K0)
+
+            # P₀𝐲 = K₀⁻¹𝐲 - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹𝐲.
+
+            # Useful for permutation
+            if idx_E is None:
+                E0 = self._E0
+            else:
+                E0 = self._E0[idx_E, :]
+
+            # The covariance matrix of H1 is K = K₀ + 𝓋₃diag(𝐠)⋅𝙴𝙴ᵀ⋅diag(𝐠)
+            # We have ∂K/∂𝓋₃ = diag(𝐠)⋅𝙴𝙴ᵀ⋅diag(𝐠)
+            # The score test statistics is given by
+            # Q = ½𝐲ᵀP₀⋅∂K⋅P₀𝐲
+            # start = time()
+
+            # Useful for permutation
+            if idx_G is None:
+                gtest = g.ravel()
+            else:
+                gtest = g.ravel()[idx_G]
+
+            ss = ScoreStatistic(P, qscov, ddot(gtest, E0))
+            Q = ss.statistic(self._y)
+            # import numpy as np
+
+            # deltaK = np.diag(gtest) @ EE @ np.diag(gtest)
+            # Q_ = 0.5 * self._y.T @ P0 @ deltaK @ P0 @ self._y
+            # print(f"Elapsed: {time() - start}")
+            # Q is the score statistic for our interaction test and follows a linear
+            # combination
+            # of chi-squared (df=1) distributions:
+            # Q ∼ ∑λχ², where λᵢ are the non-zero eigenvalues of ½√P₀⋅∂K⋅√P₀.
+            # Since eigenvals(𝙰𝙰ᵀ) = eigenvals(𝙰ᵀ𝙰) (TODO: find citation),
+            # we can compute ½(√∂K)P₀(√∂K) instead.
+            # start = time()
+            # import scipy as sp
+            # sqrtm = sp.linalg.sqrtm
+            # np.linalg.eigvalsh(0.5 * sqrtm(P0) @ deltaK @ sqrtm(P0))
+            # np.linalg.eigvalsh(0.5 * sqrtm(deltaK) @ P0 @ sqrtm(deltaK))
+            # TODO: compare with Liu approximation, maybe try a computational intensive
+            # method
+            pval, pinfo = davies_pvalue(Q, ss.matrix_for_dist_weights(), True)
+            pvalues.append(pval)
+            # print(f"Elapsed: {time() - start}")
+
+        info = {key: asarray(v, float) for key, v in info.items()}
+        return asarray(pvalues, float), info
+
+
+def lrt_pvalues(null_lml, alt_lmls, dof=1):
+    """
+    Compute p-values from likelihood ratios.
+
+    These are likelihood ratio test p-values.
+
+    Parameters
+    ----------
+    null_lml : float
+        Log of the marginal likelihood under the null hypothesis.
+    alt_lmls : array_like
+        Log of the marginal likelihoods under the alternative hypotheses.
+    dof : int
+        Degrees of freedom.
+
+    Returns
+    -------
+    pvalues : ndarray
+        P-values.
+    """
+    from numpy import clip
+    from numpy_sugar import epsilon
+    from scipy.stats import chi2
+
+    lrs = clip(-2 * null_lml + 2 * asarray(alt_lmls, float), epsilon.super_tiny, inf)
+    pv = chi2(df=dof).sf(lrs)
+    return clip(pv, epsilon.super_tiny, 1 - epsilon.tiny)
+
+def run_association(y, W, E, G, hK=None):
+    """
+    Association test.
+    
+    Test for persistent genetic effects.
+
+    Compute p-values using a likelihood ratio test.
+    
+    Parameters
+    ----------
+    y : array
+        Phenotype
+    W : array
+	Fixed effect covariates
+    E : array
+	Cellular contexts
+    G : array
+	Genotypes (expanded)
+    hK : array
+	 decompositon of kinship matrix (expanded)
+    
+    Returns
+    -------
+    pvalues : ndarray
+        P-values.
+    """
+    if hK is None: hK = None 
+    crm = CellRegMap(y, W, E, hK=hK)
+    pv = crm.scan_association(G)
+    return pv
+
+def run_association_fast(y, W, E, G, hK=None):
+    """
+    Association test.
+
+    Test for persistent genetic effects.
+
+    Compute p-values using a likelihood ratio test.
+
+    Parameters
+    ----------
+    y : array
+        Phenotype
+    W : array
+    Fixed effect covariates
+    E : array
+    Cellular contexts
+    G : array
+    Genotypes (expanded)
+    hK : array
+    decompositon of kinship matrix (expanded)
+
+    Returns
+    -------
+    pvalues : ndarray
+        P-values.
+    """
+    if hK is None: hK = None
+    crm = CellRegMap(y, W, E, hK=hK)
+    pv = crm.scan_association_fast(G)
+    return pv
+
+def get_L_values(hK, E):
+    """
+    As the definition of Ls is not particulatly intuitive,
+    function to extract list of L values given kinship K and 
+    cellular environments E
+    """
+    # get eigendecomposition of EEt
+    [U, S, _] = economic_svd(E)
+    us = U * S
+
+    # get decomposition of K \odot EEt
+    Ls = [ddot(us[:,i], hK) for i in range(us.shape[1])]
+    return Ls
+
+def run_interaction(y, E, G, W=None, E1=None, E2=None, hK=None, idx_G=None):
+    """
+    Interaction test.
+
+    Test for cell-level genetic effects due to GxC interactions.
+
+    Compute p-values using a score test.
+
+    Parameters
+    ----------
+    y : array
+        Phenotype
+    E : array
+        Cellular contexts (GxC component)
+    G : array
+        Genotypes (expanded)
+    W : array
+        Fixed effect covariates
+    hK : array
+         decompositon of kinship matrix (expanded)
+    E1 : array
+        Cellular contexts (C component)
+    E2 : array
+        Cellular contexts (K*C component)
+    idx_G : array
+        Permuted genotype index
+
+    Returns
+    -------
+    pvalues : ndarray
+        P-values.
+    """
+    if E1 is None: E1 = E
+    else: E1 = E1
+    if E2 is None: E2 = E
+    else: E2 = E2
+    if hK is None: Ls = None 
+    else: Ls = get_L_values(hK, E2)
+    crm = CellRegMap(y=y, E=E, W=W, E1=E1, Ls=Ls)
+    pv = crm.scan_interaction(G, idx_G)
+    return pv
+
+def compute_maf(X):
+    r"""Compute minor allele frequencies.
+    It assumes that ``X`` encodes 0, 1, and 2 representing the number
+    of alleles (or dosage), or ``NaN`` to represent missing values.
+    Parameters
+    ----------
+    X : array_like
+        Genotype matrix.
+    Returns
+    -------
+    array_like
+        Minor allele frequencies.
+    Examples
+    --------
+    .. doctest::
+        >>> from numpy.random import RandomState
+        >>> from limix.qc import compute_maf
+        >>>
+        >>> random = RandomState(0)
+        >>> X = random.randint(0, 3, size=(100, 10))
+        >>>
+        >>> print(compute_maf(X)) # doctest: +FLOAT_CMP
+        [0.49  0.49  0.445 0.495 0.5   0.45  0.48  0.48  0.47  0.435]
+    """
+    import dask.array as da
+    import xarray as xr
+    from pandas import DataFrame
+    from numpy import isnan, logical_not, minimum, nansum
+    if isinstance(X, da.Array):
+        s0 = da.nansum(X, axis=0).compute()
+        denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute()
+    elif isinstance(X, DataFrame):
+        s0 = X.sum(axis=0, skipna=True)
+        denom = 2 * logical_not(X.isna()).sum(axis=0)
+    elif isinstance(X, xr.DataArray):
+        if "sample" in X.dims:
+            kwargs = {"dim": "sample"}
+        else:
+            kwargs = {"axis": 0}
+        s0 = X.sum(skipna=True, **kwargs)
+        denom = 2 * logical_not(isnan(X)).sum(**kwargs)
+    else:
+        s0 = nansum(X, axis=0)
+        denom = 2 * logical_not(isnan(X)).sum(axis=0)
+    s0 = s0 / denom
+    s1 = 1 - s0
+    maf = minimum(s0, s1)
+    if hasattr(maf, "name"):
+        maf.name = "maf"
+    return maf
+
+def estimate_betas(y, W, E, G, maf=None, E1=None, E2=None, hK=None):
+    """
+    Effect sizes estimator
+
+    Estimates cell-level genetic effects due to GxC 
+    as well as persistent genetic effects across all cells.
+
+    Parameters
+    ----------
+    y : array
+        Phenotype
+    W : array
+        Fixed effect covariates
+    E : array
+        Cellular contexts
+    G : array
+        Genotypes (expanded)
+    maf: array
+	    Minor allele frequencies (MAFs) for the SNPs in G
+    hK : array
+         decompositon of kinship matrix (expanded)
+    E1 : array
+        Cellular contexts (C component)
+    E2 : array
+        Cellular contexts (K*C component)
+
+    Returns
+    -------
+    betas : ndarray
+        estimated effect sizes, both persistent and due to GxC.
+    """
+    if E1 is None: E1 = E
+    else: E1 = E1
+    if E2 is None: E2 = E
+    else: E2 = E2
+    if hK is None: Ls = None 
+    else: Ls = get_L_values(hK, E2)
+    crm = CellRegMap(y=y, E=E, W=W, E1=E1, Ls=Ls)
+    if maf is None:
+        maf = compute_maf(G)
+    # print("MAFs: {}".format(maf))
+    betas = crm.predict_interaction(G, maf)
+    return betas
diff --git a/cellregmap/_math.py b/cellregmap/_math.py
new file mode 100644
index 0000000..fc56572
--- /dev/null
+++ b/cellregmap/_math.py
@@ -0,0 +1,256 @@
+"""
+Mathematical definitions
+------------------------
+
+This module defines some core mathematical concepts which CellRegMap depends on.
+The implementations provided here are meant to help understand those concepts and to provide test cases.
+
+We assume the definition
+
+    𝐲 ∼ 𝓝(𝚆, 𝙺)
+
+throughout this module.
+
+References
+----------
+.. [1] Lippert, C., Xiang, J., Horta, D., Widmer, C., Kadie, C., Heckerman, D.,
+   & Listgarten, J. (2014). Greater power and computational efficiency for
+   kernel-based association testing of sets of genetic variants. Bioinformatics,
+   30(22), 3206-3214.
+.. [2] Liu, H., Tang, Y., & Zhang, H. H. (2009). A new chi-square approximation
+   to the distribution of non-negative definite quadratic forms in non-central
+   normal variables. Computational Statistics & Data Analysis, 53(4), 853-856.
+.. [3] Lee, Seunggeun, Michael C. Wu, and Xihong Lin. "Optimal tests for rare
+   variant effects in sequencing association studies." Biostatistics 13.4 (2012):
+   762-775.
+"""
+from numpy import finfo, logical_not, sqrt
+from numpy.linalg import eigh, eigvalsh, inv, lstsq, solve, svd
+from numpy_sugar import ddot
+from scipy.linalg import sqrtm
+
+
+def rsolve(a, b):
+    """
+    Robust solver.
+    """
+    return lstsq(a, b, rcond=None)[0]
+
+
+class QSCov:
+    """
+    Represents 𝑎𝙺 + 𝑏𝙸.
+
+    𝙺 matrix is defined by its eigen decomposition, `QS`.
+    """
+
+    def __init__(self, Q0, S0, a=1.0, b=1.0):
+        self._Q0 = Q0
+        self._S0 = S0
+        self._a = a
+        self._b = b
+
+    def dot(self, v):
+        left = self._a * self._Q0 @ ddot(self._S0, self._Q0.T @ v, left=True)
+        right = self._b * v
+        return left + right
+
+    def solve(self, v):
+        # nrows = self._Q0.shape[0]
+        # rank = self._Q0.shape[1]
+
+        # tmp = ones(nrows)
+        # tmp[:rank] += (self._a / self._b) * self._S0
+        # R = 1 / tmp
+
+        # R0 = R[:rank]
+        # R1 = R[rank:]  # Always ones
+
+        # tmp = (self._a / self._b) * self._S0
+        R0 = 1 / (1 + ((self._a / self._b) * self._S0))
+        # R0 = R[:rank]
+        Q0v = self._Q0.T @ v
+        return (self._Q0 @ ddot(R0, Q0v, left=True) + v - self._Q0 @ Q0v) / self._b
+        # left = self._Q0 @ ddot(R0, self._Q0.T @ v, left=True)
+        # right = self._Q1 @ ddot(R1, self._Q1.T @ v, left=True)
+        # return (left + right) / self._b
+
+
+class PMat:
+    """
+    Represents 𝙿 = 𝙺⁻¹ - 𝙺⁻¹𝚆(𝚆ᵀ𝙺⁻¹𝚆)⁻¹𝚆ᵀ𝙺⁻¹.
+
+    The 𝙺 is defined via an `QSCov` object.
+    """
+
+    def __init__(self, qscov: QSCov, W):
+        self._qscov = qscov
+        self._W = W
+        self._KiW = self._qscov.solve(self._W)
+
+    def dot(self, v):
+        Kiv = self._qscov.solve(v)
+        return Kiv - self._KiW @ rsolve(self._W.T @ self._KiW, self._KiW.T @ v)
+
+
+def P_matrix(W, K):
+    """ Computes 𝙿 = 𝙺⁻¹ - 𝙺⁻¹𝚆(𝚆ᵀ𝙺⁻¹𝚆)⁻¹𝚆ᵀ𝙺⁻¹. """
+    KiW = solve(K, W)
+    return inv(K) - KiW @ solve(W.T @ KiW, KiW.T)
+
+
+class ScoreStatistic:
+    """
+    Score-test statistic [1]_ is given by
+
+        𝑄 = ½𝐲ᵀ𝙿(∂𝙺)𝙿𝐲.
+    """
+
+    def __init__(self, P: PMat, K: QSCov, sqrt_dK):
+        self._P = P
+        self._K = K
+        self._sqrt_dK = sqrt_dK
+
+    def statistic(self, y):
+        """ Compute 𝑄 = ½𝐲ᵀ𝙿(∂𝙺)𝙿𝐲. """
+        Py = self._P.dot(y)
+        return Py.T @ self._sqrt_dK @ self._sqrt_dK.T @ Py / 2
+
+    def matrix_for_dist_weights(self):
+        """Compute ½(√∂𝙺)𝙿(√∂𝙺).
+
+        The returned matrix has its eigenvalues equal to the eigenvalues of ½√𝙿(∂𝙺)√𝙿.
+        """
+        return self._sqrt_dK.T @ self._P.dot(self._sqrt_dK) / 2
+
+    def distr_weights(self):
+        weights = eigvalsh(self.matrix_for_dist_weights())
+        return weights[weights > 1e-16]
+
+
+def score_statistic(y, W, K, dK):
+    """
+    Score-test statistic [1]_ is given by
+
+        𝑄 = ½𝐲ᵀ𝙿(∂𝙺)𝙿𝐲.
+    """
+    P = P_matrix(W, K)
+    return y.T @ P @ dK @ P @ y / 2
+
+
+def score_statistic_qs(y, W, qscov, dK):
+    """
+    Same as `score_statistic`.
+    """
+    P = PMat(qscov, W)
+    Py = P.dot(y)
+    return Py.T @ dK @ Py / 2
+
+
+def score_statistic_distr_weights(W, K, dK):
+    """
+    Score-test statistic follows a weighted sum of random variables [1]_:
+
+        𝑄 ∼ ∑ᵢ𝜆ᵢχ²(1),
+
+    where 𝜆ᵢ are the non-zero eigenvalues of ½√𝙿(∂𝙺)√𝙿.
+    """
+    P = P_matrix(W, K)
+    weights = eigvalsh(sqrtm(P) @ dK @ sqrtm(P)) / 2
+    return weights[weights > 1e-16]
+
+
+def score_statistic_liu_params(q, weights):
+    """
+    Computes Pr(𝑄 > q) for 𝑄 ∼ ∑ᵢ𝜆ᵢχ²(1) using a modification [3]_ of the original Liu
+    survival function approximation [2]_. This function also returns estimated
+    parameters, not yet fully explained here.
+    """
+    from chiscore import liu_sf
+
+    n = len(weights)
+    # We use the Liu survival function to approximate the distribution followed by a
+    # linear combination of noncentral chi-squared variables (Q) using only three
+    # parameters  of such distribution: the weights, degrees of freedom, and
+    # noncentrality (Qh).  𝑄 ∼ ∑λᵢχ²(hᵢ, 𝛿ᵢ),
+    # where λᵢ, hᵢ, and 𝛿ᵢ are the weights, degrees of freedom (1), and noncentral
+    # (0) parameters. By setting the last input to True we use the modified version
+    # [REF].
+    (pv, dof_x, _, info) = liu_sf(q, weights, [1] * n, [0] * n, True)
+    return {"pv": pv, "mu_q": info["mu_q"], "sigma_q": info["sigma_q"], "dof_x": dof_x}
+
+
+def qmin(liu_params):
+    import scipy.stats as st
+    from numpy import zeros
+
+    n = len(liu_params)
+
+    # T statistic
+    T = min(i["pv"] for i in liu_params)
+
+    qmin = zeros(n)
+    percentile = 1 - T
+    for i in range(n):
+        q = st.chi2.ppf(percentile, liu_params[i]["dof_x"])
+        mu_q = liu_params[i]["mu_q"]
+        sigma_q = liu_params[i]["sigma_q"]
+        dof = liu_params[i]["dof_x"]
+        qmin[i] = (q - dof) / (2 * dof) ** 0.5 * sigma_q + mu_q
+
+    return qmin
+
+
+def economic_qs(K, epsilon=sqrt(finfo(float).eps)):
+    r"""Economic eigen decomposition for symmetric matrices.
+
+    A symmetric matrix ``K`` can be decomposed in
+    :math:`\mathrm Q_0 \mathrm S_0 \mathrm Q_0^\intercal + \mathrm Q_1\
+    \mathrm S_1 \mathrm Q_1^ \intercal`, where :math:`\mathrm S_1` is a zero
+    matrix with size determined by ``K``'s rank deficiency.
+
+    Args:
+        K (array_like): Symmetric matrix.
+        epsilon (float): Eigen value threshold. Default is
+                         ``sqrt(finfo(float).eps)``.
+
+    Returns:
+        tuple: ``((Q0, Q1), S0)``.
+    """
+
+    (S, Q) = eigh(K)
+
+    nok = abs(max(Q[0].min(), Q[0].max(), key=abs)) < epsilon
+    nok = nok and abs(max(K.min(), K.max(), key=abs)) >= epsilon
+    if nok:
+        from scipy.linalg import eigh as sp_eigh
+
+        (S, Q) = sp_eigh(K)
+
+    ok = S >= epsilon
+    nok = logical_not(ok)
+    S0 = S[ok]
+    Q0 = Q[:, ok]
+    Q1 = Q[:, nok]
+    return ((Q0, Q1), S0)
+
+
+def economic_qs_linear(G):
+    r"""Economic eigen decomposition for symmetric matrices ``dot(G, G.T)``.
+
+    It is theoretically equivalent to ``economic_qs(dot(G, G.T))``.
+    Refer to :func:`numpy_sugar.economic_qs` for further information.
+
+    Args:
+        G (array_like): Matrix.
+
+    Returns:
+        tuple: ``((Q0, Q1), S0)``.
+    """
+    if G.shape[0] > G.shape[1]:
+        (Q0, Ssq, _) = svd(G, full_matrices=False)
+        S0 = Ssq ** 2
+        return (Q0, S0)
+
+    Q, S0 = economic_qs(G.dot(G.T))
+    return (Q[0], S0)
diff --git a/cellregmap/_simulate.py b/cellregmap/_simulate.py
new file mode 100644
index 0000000..0665a49
--- /dev/null
+++ b/cellregmap/_simulate.py
@@ -0,0 +1,479 @@
+from collections import namedtuple
+from typing import List, Union
+
+from numpy import (
+    array_split,
+    asarray,
+    cumsum,
+    errstate,
+    eye,
+    isscalar,
+    ones,
+    repeat,
+    split,
+    sqrt,
+    stack,
+    zeros,
+)
+from numpy.random import Generator
+from numpy_sugar import ddot
+from numpy_sugar.linalg import economic_svd
+
+from ._types import Term
+
+Variances = namedtuple("Variances", "g gxe k e n")
+Simulation = namedtuple(
+    "Simulation", "mafs y offset beta_g y_g y_gxe y_k y_e y_n variances G E Lk Ls K M"
+)
+SimulationFixedGxE = namedtuple(
+    "Simulation",
+    "mafs y offset beta_g beta_gxe beta_e y_g y_gxe y_k y_e y_n variances G E Lk K M",
+)
+
+
+def sample_maf(n_snps: int, maf_min: float, maf_max: float, random: Generator):
+    assert maf_min <= maf_max and maf_min >= 0 and maf_max <= 1
+    return random.random(n_snps) * (maf_max - maf_min) + maf_min
+
+
+def sample_genotype(n_samples: int, mafs, random):
+    G = []
+    mafs = asarray(mafs, float)
+    for maf in mafs:
+        probs = [(1 - maf) ** 2, 1 - ((1 - maf) ** 2 + maf ** 2), maf ** 2]
+        g = random.choice([0, 1, 2], p=probs, size=n_samples)
+        G.append(asarray(g, float))
+
+    return stack(G, axis=1)
+
+
+def column_normalize(X):
+    X = asarray(X, float)
+
+    with errstate(divide="raise", invalid="raise"):
+        return (X - X.mean(0)) / X.std(0)
+
+
+def create_environment_matrix(
+    n_samples: int, n_env: int, groups: List[List[int]], random: Generator
+):
+    E = random.normal(size=[n_samples, n_env])
+    E = column_normalize(E)
+    EE = E @ E.T
+    EE /= EE.diagonal().mean()
+    H = sample_covariance_matrix(n_samples, groups)[1]
+    M = EE + H
+    M /= M.diagonal().mean()
+    jitter(M)
+    return _symmetric_decomp(M)
+
+
+def create_environment_vector(
+    n_samples: int, groups: List[List[int]], random: Generator
+):
+    E = zeros((n_samples, 1))
+
+    values = random.choice([-1, 1], 2, False)
+    for value, group in zip(values, groups):
+        E[group, 0] = value
+
+    return E
+
+
+def sample_covariance_matrix(n_samples: int, groups: List[List[int]]):
+    X = zeros((n_samples, len(groups)))
+
+    for i, idx in enumerate(groups):
+        X[idx, i] = 1.0
+
+    K = X @ X.T
+    K /= K.diagonal().mean()
+    jitter(K)
+
+    return (_symmetric_decomp(K), K)
+
+
+def jitter(K):
+    with errstate(divide="raise", invalid="raise"):
+        # This small diagonal offset is to guarantee the full-rankness.
+        K += 1e-8 * eye(K.shape[0])
+
+    return K
+
+
+def create_variances(r0, v0, has_kinship=True) -> Variances:
+    """
+    Remember that:
+
+        cov(𝐲) = 𝓋₀(1-ρ₀)𝙳𝟏𝟏ᵀ𝙳 + 𝓋₀ρ₀𝙳𝙴𝙴ᵀ𝙳 + 𝓋₁ρ₁EEᵀ + 𝓋₁(1-ρ₁)𝙺 + 𝓋₂𝙸.
+
+    Let us define:
+
+        σ²_g   = 𝓋₀(1-ρ₀) (variance explained by persistent genetic effects)
+        σ²_gxe = 𝓋₀ρ₀     (variance explained by GxE effects)
+
+        σ²_e   = 𝓋₁ρ₁     (variance explained by environmental effects)
+        σ²_k   = 𝓋₁(1-ρ₁) (variance explained by population structure)
+        σ²_n   = 𝓋₂       (residual variance, noise)
+
+    We set the total variance to sum up to 1:
+
+        1 = σ²_g + σ²_gxe + σ²_e + σ²_k + σ²_n
+
+    We set the variances explained by the non-genetic terms to be equal:
+
+        v = σ²_e = σ²_k = σ²_n
+
+    For `has_kinship=False`, we instead set the variances such that:
+
+        v = σ²_e = σ²_n
+
+    Parameters
+    ----------
+    r0 : float
+        This is ρ₀.
+    v0 : float
+        This is 𝓋₀.
+    """
+    v_g = v0 * (1 - r0)
+    v_gxe = v0 * r0
+
+    v_k = 0.0
+    if has_kinship:
+        v = (1 - v_gxe - v_g) / 3
+        v_e = v
+        v_k = v
+        v_n = v
+    else:
+        v = (1 - v_gxe - v_g) / 2
+        v_e = v
+        v_n = v
+
+    variances = {"g": v_g, "gxe": v_gxe, "e": v_e, "n": v_n}
+    if has_kinship:
+        variances["k"] = v_k
+    else:
+        variances["k"] = None
+
+    return Variances(**variances)
+
+
+def sample_persistent_effsizes(
+    n_effects: int, causal_indices: list, variance: float, random: Generator
+):
+    """
+    Let 𝚓 denote a sample index and 𝚔 denote a SNP index. Let 𝚟ⱼ = 𝐠ⱼᵀ𝛃.
+    We assume that 𝑔ⱼₖ is a random variable such that:
+
+        𝔼[𝑔ⱼₖ] = 0
+        𝔼[𝑔ⱼₖ²] = 1
+
+    And we also assume that SNPs are uncorrelated from each other: 𝔼[𝑔ⱼₖ⋅𝑔ⱼᵣ] = 0
+    for 𝚔≠𝚛.
+    Assuming that 𝛃 is given (fixed), we want to simulate 𝛃 such that:
+
+        𝔼[𝚟ⱼ] = 𝔼[∑ₖ𝑔ⱼₖ𝛽ₖ] = ∑ₖ𝔼[𝑔ⱼₖ]𝛽ₖ = 0
+        𝔼[𝚟ⱼ²] = 𝔼[(∑ₖ𝑔ⱼₖ𝛽ₖ)²] = ∑ₖ𝔼[𝑔ⱼₖ²]𝛽ₖ² = ∑ₖ𝛽ₖ² = 𝓋.
+
+    Let 𝚒 denote a causal index. We initialize 𝛃←𝟎 and then randomly set 𝛽ᵢϵ{-1,+1} for
+    the causal SNPs. At last, we set 𝛃←𝛃×√(𝓋/𝘯) where 𝘯 is the number of causal SNPs.
+    This way we have ∑ₖ𝛽ₖ² = 𝓋.
+
+    Parameters
+    ----------
+    n_effects : int
+        Number of effects.
+    causal_indices : list
+        List of causal SNPs.
+    variance : float
+        Correspond to 𝓋.
+    """
+    n_causals = len(causal_indices)
+
+    effsizes = zeros(n_effects)
+    if variance == 0.0:
+        return effsizes
+
+    effsizes[causal_indices] = random.choice([+1, -1], size=len(causal_indices))
+    with errstate(divide="raise", invalid="raise"):
+        effsizes *= sqrt(variance / n_causals)
+
+    return effsizes
+
+
+def sample_persistent_effects(X, effsizes, variance: float):
+    y_g = X @ effsizes
+    if variance > 0:
+        _ensure_moments(y_g, 0, variance)
+    return y_g
+
+
+def sample_gxe_effects(G, E, causal_indices: list, variance: float, random: Generator):
+    """
+    Let 𝚒 denote a SNP index and 𝚓 denote an environment.
+    Let 𝑦₂ = ∑ᵢ(𝑔ᵢ⋅𝛜ᵀ𝜶ᵢ) be the total GxE effect with
+
+        𝜶ᵢ ∼ 𝓝(𝟎, 𝜎ᵢ²I)
+
+    for every SNP ᵢ.
+    We have
+
+        𝔼[𝑦₂] = ∑ᵢ𝔼[𝑔ᵢ⋅𝛜ᵀ𝜶ᵢ] = ∑ᵢ𝔼[𝑔ᵢ]𝔼[𝛜ᵀ𝜶ᵢ] = ∑ᵢ0⋅𝔼[𝛜ᵀ𝜶ᵢ] = 0,
+
+    where 𝑔ᵢ and 𝛜ᵀ𝜶ᵢ are assumed to be uncorrelated.
+
+    We also have
+
+        𝔼[𝑦₂²] = 𝔼[(∑ᵢ𝑔ᵢ⋅𝛜ᵀ𝜶ᵢ)²] = ∑ᵢ∑ⱼ𝔼[𝜖ⱼ²]𝔼[𝛼ᵢⱼ²] = ∑ᵢ𝜎ᵢ² = 𝜎²,
+
+    after a couple of assumptions.
+
+    We define 𝜎ᵢ²=𝑣ᵢ if 𝑔ᵢ is causal and 𝜎ᵢ²=0 otherwise. We assume all causal SNPs
+    to have equal effect as defined by 𝑣ᵢ=𝜎²/𝑛₂, where 𝑛₂ is the number of SNPs
+    having GxE effects.
+
+    We also assume that 𝔼[𝜖ⱼ]=0 and 𝔼[𝜖ⱼ²]=1/𝑛ₑ for every environment 𝚓.
+    """
+    n_samples = G.shape[0]
+    n_envs = E.shape[1]
+    n_causals = len(causal_indices)
+
+    y2 = zeros(n_samples)
+    if variance == 0.0:
+        return y2
+
+    vi = variance / n_causals
+    for causal in causal_indices:
+        # 𝜶ᵢ ∼ 𝓝(𝟎, 𝜎ᵢ²I)
+        alpha = sqrt(vi) * random.normal(size=n_envs)
+
+        # Make the sample statistics close to population
+        # statistics
+        if n_envs > 1:
+            _ensure_moments(alpha, 0, sqrt(vi))
+
+        # 𝜷 = 𝛜ᵀ𝜶ᵢ
+        beta = E @ alpha
+
+        # 𝑔ᵢ⋅𝛜ᵀ𝜶ᵢ
+        y2 += G[:, causal] * beta
+
+    _ensure_moments(y2, 0, variance)
+
+    return y2
+
+
+# def sample_environment_effects(E, variance: float, random):
+#     from numpy import sqrt
+
+#     n_envs = E.shape[1]
+#     effsizes = sqrt(variance) * random.randn(n_envs)
+#     y3 = E @ effsizes
+
+#     _ensure_moments(y3, 0, variance)
+
+#     return y3
+
+
+# def sample_random_effect(K, variance: float, random: Generator):
+#     y = random.multivariate_normal(zeros(K.shape[0]), K, method="cholesky")
+
+#     _ensure_moments(y, 0, variance)
+
+#     return y
+
+def _sample_random_effect(X, variance: float, random: Generator):
+    u = sqrt(variance) * random.normal(size=X.shape[1])
+    y = X @ u
+
+    _ensure_moments(y, 0, variance)
+
+    return y
+
+
+def sample_random_effect(X, variance: float, random: Generator):
+    if not isinstance(X, tuple):
+        return _sample_random_effect(X, variance, random)
+
+    n = X[0].shape[0]
+    y = zeros(n)
+    for L in X:
+        u = sqrt(variance) * random.normal(size=L.shape[1])
+        y += L @ u
+    _ensure_moments(y, 0, variance)
+
+    return y
+
+
+def sample_noise_effects(n_samples: int, variance: float, random: Generator):
+    y5 = sqrt(variance) * random.normal(size=n_samples)
+    _ensure_moments(y5, 0, variance)
+
+    return y5
+
+
+def sample_phenotype_gxe(
+    offset: float,
+    n_individuals: int,
+    n_snps: int,
+    n_cells: Union[int, List[int]],
+    n_env_groups: int,
+    maf_min: float,
+    maf_max: float,
+    g_causals: list,
+    gxe_causals: list,
+    variances: Variances,
+    random: Generator,
+    env_term: Term = Term.RANDOM,
+) -> Simulation:
+    """
+    Parameters
+    ----------
+    n_cells
+         Integer number of array of integers.
+    """
+    mafs = sample_maf(n_snps, maf_min, maf_max, random)
+
+    G = sample_genotype(n_individuals, mafs, random)
+    G = repeat(G, n_cells, axis=0)
+    G = column_normalize(G)
+
+    n_samples = G.shape[0]
+
+    if isscalar(n_cells):
+        individual_groups = array_split(range(n_samples), n_individuals)
+    else:
+        individual_groups = split(range(n_samples), cumsum(n_cells))[:-1]
+
+    env_groups = array_split(random.permutation(range(n_samples)), n_env_groups)
+
+    E = sample_covariance_matrix(n_samples, env_groups)[0]
+
+    Lk, K = sample_covariance_matrix(n_samples, individual_groups)
+    [U, S, _] = economic_svd(E)
+    us = U * S
+    Ls = tuple([ddot(us[:, i], Lk) for i in range(us.shape[1])])
+
+    beta_g = sample_persistent_effsizes(n_snps, g_causals, variances.g, random)
+    y_g = sample_persistent_effects(G, beta_g, variances.g)
+
+    y_gxe = sample_gxe_effects(G, E, gxe_causals, variances.gxe, random)
+
+    y_k = sample_random_effect(Ls, variances.k, random)
+
+    if env_term is Term.RANDOM:
+        y_e = sample_random_effect(E, variances.e, random)
+    elif env_term is Term.FIXED:
+        n = E.shape[1]
+        beta_e = sample_persistent_effsizes(n, list(range(n)), variances.e, random)
+        y_e = sample_persistent_effects(E, beta_e, variances.e)
+    else:
+        raise ValueError("Invalid term.")
+
+    y_n = sample_noise_effects(n_samples, variances.n, random)
+
+    M = ones((K.shape[0], 1))
+    y = offset + y_g + y_gxe + y_k + y_e + y_n
+
+    simulation = Simulation(
+        mafs=mafs,
+        offset=offset,
+        beta_g=beta_g,
+        y_g=y_g,
+        y_gxe=y_gxe,
+        y_k=y_k,
+        y_e=y_e,
+        y_n=y_n,
+        y=y,
+        variances=variances,
+        Lk=Lk,
+        Ls=Ls,
+        K=K,
+        E=E,
+        G=G,
+        M=M,
+    )
+
+    return simulation
+
+
+def sample_phenotype(
+    offset: float,
+    n_individuals: int,
+    n_snps: int,
+    n_cells: Union[int, List[int]],
+    n_env: int,
+    n_env_groups: int,
+    maf_min: float,
+    maf_max: float,
+    g_causals: list,
+    gxe_causals: list,
+    variances: Variances,
+    random: Generator,
+) -> Simulation:
+    """
+    Parameters
+    ----------
+    n_cells
+         Integer number of array of integers.
+    """
+    mafs = sample_maf(n_snps, maf_min, maf_max, random)
+
+    G = sample_genotype(n_individuals, mafs, random)
+    G = repeat(G, n_cells, axis=0)
+    G = column_normalize(G)
+
+    n_samples = G.shape[0]
+    individual_groups = array_split(range(n_samples), n_individuals)
+
+    env_groups = array_split(random.permutation(range(n_samples)), n_env_groups)
+    E = create_environment_matrix(n_samples, n_env, env_groups, random)
+
+    Lk, K = sample_covariance_matrix(n_samples, individual_groups)
+
+    beta_g = sample_persistent_effsizes(n_snps, g_causals, variances.g, random)
+
+    y_g = sample_persistent_effects(G, beta_g, variances.g)
+
+    y_gxe = sample_gxe_effects(G, E, gxe_causals, variances.gxe, random)
+
+    y_k = sample_random_effect(Lk, variances.k, random)
+
+    y_e = sample_random_effect(E, variances.e, random)
+
+    y_n = sample_noise_effects(n_samples, variances.n, random)
+
+    M = ones((K.shape[0], 1))
+    y = offset + y_g + y_gxe + y_k + y_e + y_n
+
+    simulation = Simulation(
+        mafs=mafs,
+        offset=offset,
+        beta_g=beta_g,
+        y_g=y_g,
+        y_gxe=y_gxe,
+        y_k=y_k,
+        y_e=y_e,
+        y_n=y_n,
+        y=y,
+        variances=variances,
+        Lk=Lk,
+        K=K,
+        E=E,
+        G=G,
+        M=M,
+    )
+
+    return simulation
+
+
+def _ensure_moments(arr, mean: float, variance: float):
+    arr -= arr.mean(0) + mean
+    with errstate(divide="raise", invalid="raise"):
+        arr /= arr.std(0)
+    arr *= sqrt(variance)
+
+
+def _symmetric_decomp(H):
+    [U, S, _] = economic_svd(H)
+    return ddot(U, sqrt(S))
diff --git a/cellregmap/_types.py b/cellregmap/_types.py
new file mode 100644
index 0000000..895923a
--- /dev/null
+++ b/cellregmap/_types.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+__all__ = ["Term"]
+
+
+class Term(Enum):
+    FIXED = 1
+    RANDOM = 2
diff --git a/struct_lmm2/test/__init__.py b/cellregmap/test/__init__.py
similarity index 100%
rename from struct_lmm2/test/__init__.py
rename to cellregmap/test/__init__.py
diff --git a/cellregmap/test/old_test_struct_lmm2.py b/cellregmap/test/old_test_struct_lmm2.py
new file mode 100644
index 0000000..24c690d
--- /dev/null
+++ b/cellregmap/test/old_test_struct_lmm2.py
@@ -0,0 +1,99 @@
+import pytest
+import scipy.stats as stats
+from numpy import array, asarray, eye, hstack, ones, sqrt, stack, tile, zeros
+from numpy.random import RandomState
+from numpy.testing import assert_allclose
+
+from struct_lmm2 import StructLMM2
+from struct_lmm2._simulate import (
+    column_normalize,
+    create_environment_matrix,
+    sample_covariance_matrix,
+    sample_genotype,
+    sample_maf,
+    sample_persistent_effsizes,
+    create_variances,
+)
+
+
+@pytest.fixture
+def data():
+    seed = 0
+    random = RandomState(seed)
+    n_samples = 100
+    maf_min = 0.05
+    maf_max = 0.45
+    n_snps = 20
+    # SNPs with persistent effect
+    g_snps = [5, 6]
+    # SNPs with interaction effect
+    gxe_snps = [10, 11]
+    # Contribution of interactions (proportion)
+    r0 = 0.8
+    v0 = 0.4
+
+    # Simulate two environments
+    E = create_environment_matrix(n_samples)
+
+    mafs = sample_maf(n_snps, maf_min, maf_max, random)
+    G = sample_genotype(n_samples, mafs, random)
+    # We normalize the columns of G so that we have 𝔼[𝐠ᵀ𝐠] = 1.
+    G = column_normalize(G)
+
+    K = sample_covariance_matrix(n_samples, random)
+    v = create_variances(r0, v0)
+    beta_g = sample_effect_sizes(n_snps, g_snps, v["v_g"], random)
+    beta_gxe = sample_effect_sizes(n_snps, gxe_snps, v["v_gxe"], random)
+    y_g = G @ beta_g
+
+    for i in range(n_snps):
+        # beta_gxe = random.multivariate_normal(zeros(n_samples), sigma_gxe[i] * Sigma)
+        beta_gxe = sigma_gxe[i] * u_gxe
+        y_gxe += G[:, i] * beta_gxe
+
+    # return {"y": y, "W": W, "E": E, "K": K, "G": G}
+
+
+@pytest.mark.skip("Not working yet.")
+def test_structlmm_int_2kinships():
+    random = RandomState(0)
+    n = 100
+    c = 2
+    y = random.randn(n)
+    M = ones((n, 1))
+    W = random.randn(n, c)
+    E = random.randn(n, 4)
+
+    p_values1 = []
+    p_values2 = []
+
+    for i in range(10):
+        random = RandomState(i)
+        g = random.randn(n)
+        lmm2 = StructLMM2(y, W, E)
+        lmm2.fit_null(g)
+        _p = lmm2.score_2_dof(g)
+        p_values2.append(_p)
+
+        y = y.reshape(y.shape[0], 1)
+        lmm = StructLMM(y, M, E)
+        g = g.reshape(g.shape[0], 1)
+        covs1 = hstack((W, g))
+        lmm.fit(covs1)
+        _p = lmm.score_2dof_inter(g)
+        p_values1.append(_p)
+
+    print(stats.pearsonr(p_values2, p_values1)[0])
+    assert_allclose(p_values2, p_values1, rtol=1e-4)
+
+
+@pytest.mark.skip("Not working yet.")
+def test_structlmm2_interaction(data):
+    y = data["y"]
+    W = data["W"]
+    E = data["E"]
+    K = data["K"]
+    G = data["G"]
+    lmm2 = StructLMM2(y, W, E, K)
+    print(lmm2.scan_interaction(G))
+    pass
diff --git a/cellregmap/test/test_cellregmap.py b/cellregmap/test/test_cellregmap.py
new file mode 100644
index 0000000..e69de29
diff --git a/cellregmap/test/test_fixed_gxe.py b/cellregmap/test/test_fixed_gxe.py
new file mode 100644
index 0000000..4327212
--- /dev/null
+++ b/cellregmap/test/test_fixed_gxe.py
@@ -0,0 +1,137 @@
+import numpy as np
+from glimix_core.lmm import LMM
+from numpy import arange, asarray, clip, concatenate, inf, median, newaxis
+from numpy.random import RandomState, default_rng
+from numpy_sugar import epsilon
+from numpy_sugar.linalg import economic_qs, economic_qs_linear
+from scipy.stats import chi2
+
+from cellregmap import create_variances, sample_phenotype_fixed_gxe
+
+
+def test_fixed_gxe():
+    random = default_rng(20)
+
+    n_individuals = 100
+    # n_individuals = 200
+    # n_individuals = 500
+    # n_individuals = 250
+    maf_min = 0.20
+    # maf_min = 0.05
+    maf_max = 0.45
+    # n_snps = 250
+    n_snps = 100
+    # n_snps = 50
+    # n_snps = 20
+    # n_cells = 100
+    # n_cells = 10
+    # n_cells = 2
+    n_cells = arange(n_individuals) + 1
+    # n_cells = 1
+    n_env_groups = 2
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+
+    v = create_variances(r0, v0)
+
+    s = sample_phenotype_fixed_gxe(
+        offset=offset,
+        n_individuals=n_individuals,
+        n_snps=n_snps,
+        n_cells=n_cells,
+        n_env_groups=n_env_groups,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    QS = economic_qs_linear(s.Lk)
+    # QS = economic_qs(s.K)
+
+    # Test 1:
+    #   H0: s.y_e + s.y_k + s.y_n
+    #   H1: s.y_g + s.y_e + s.y_k + s.y_n
+    y = s.offset + s.y_g + 0*s.y_gxe + s.y_e + s.y_k + s.y_n
+    M = concatenate([s.M, s.E], axis=1)
+    lmm = LMM(y, M, QS, restricted=False)
+    lmm.fit(verbose=False)
+    scanner = lmm.get_fast_scanner()
+    data1 = scanner.fast_scan(s.G)
+    # Asserting to make sure that DoF==1
+    assert data1["effsizes1"].ndim == 1
+    dof = 1
+    lml0 = scanner.null_lml()
+    lml1 = data1["lml"]
+    pv = test1_pvalues = lrt_pvalues(lml0, lml1, dof)
+    breakpoint()
+    print(pv[5], pv[6], pv[10], pv[11], median(pv[12:]))
+    # arange     : 4.782253229179153e-192 1.0433583537506347e-196 0.01091492319670193 0.0010927629879084017 4.8750903985107926e-05
+    # 2 n_cells  : 1.090975503699979e-08 4.899993877637627e-08 0.009198608906693148 0.4204345922908054 0.5335573850563654
+    # 10 n_cells : 3.982864661957504e-08 1.9070517075766003e-08 0.09201571609949227 0.35721972899471366 0.543538454382785
+    # 100 n_cells: 4.469523142654005e-08 3.0916502150797485e-09 0.07348502556111537 0.37843373021086246 0.5058335803047611
+
+    # Test 2:
+    #   H0: s.y_g + s.y_e + s.y_k + s.y_n
+    #   H1: s.y_gxe + s.y_g + s.y_e + s.y_k + s.y_n
+    y = s.offset + s.y_gxe + s.y_g + s.y_e + s.y_k + s.y_n
+    assert min(abs(y - s.y)) < 1e-10
+    data2 = {"lml0": [], "lml1": []}
+    dof = 1
+    for g in s.G.T:
+        g = g[:, newaxis]
+        M = concatenate([s.M, g, s.E], axis=1)
+        lmm = LMM(y, M, QS, restricted=False)
+        lmm.fit(verbose=False)
+        scanner = lmm.get_fast_scanner()
+        d = scanner.fast_scan(s.E * g)
+        # Asserting to make sure that DoF==1
+        assert d["effsizes1"].ndim == 1
+        lml0 = scanner.null_lml()
+        lml1 = d["lml"]
+        data2["lml0"].append(lml0)
+        data2["lml1"].append(lml1)
+
+    lml0 = asarray(data2["lml0"])
+    lml1 = concatenate(data2["lml1"])
+    pv = test2_pvalues = lrt_pvalues(lml0, lml1, dof)
+    breakpoint()
+    print(pv[5], pv[6], pv[10], pv[11], median(pv[12:]))
+    # arange     : 0.6084591769169483 3.6950357207986164e-10 3.457029241798351e-227 1.2283428436867437e-223 0.005415575098636543
+    # 2 n_cells  : 0.07359659661565397 0.4350647685877501 2.3937927457201096e-07 1.9917895864290987e-16 0.43616571604445775
+    # 10 n_cells : 0.3210415979502985 0.007255621562641298 1.109487704555233e-32 1.0877864951200334e-44 0.17000683356223117
+    # 100 n_cells: 0.25813564725186755 8.150568151907782e-22 2.2250738585072014e-308 2.2250738585072014e-308 0.00013995763429074527
+    pass
+
+def compute_pvalues():
+    stats["pv20"] = lrt_pvalues(stats["lml0"], stats["lml2"], stats["dof20"])
+
+def lrt_pvalues(null_lml, alt_lmls, dof=1):
+    """
+    Compute p-values from likelihood ratios.
+
+    These are likelihood ratio test p-values.
+
+    Parameters
+    ----------
+    null_lml : float
+        Log of the marginal likelihood under the null hypothesis.
+    alt_lmls : array_like
+        Log of the marginal likelihoods under the alternative hypotheses.
+    dof : int
+        Degrees of freedom.
+
+    Returns
+    -------
+    pvalues : ndarray
+        P-values.
+    """
+    lrs = clip(-2 * null_lml + 2 * asarray(alt_lmls, float), epsilon.super_tiny, inf)
+    pv = chi2(df=dof).sf(lrs)
+    return clip(pv, epsilon.super_tiny, 1 - epsilon.tiny)
+
diff --git a/cellregmap/test/test_math.py b/cellregmap/test/test_math.py
new file mode 100644
index 0000000..75135de
--- /dev/null
+++ b/cellregmap/test/test_math.py
@@ -0,0 +1,91 @@
+import pytest
+from celregmap._math import (
+    P_matrix,
+    QSCov,
+    qmin,
+    rsolve,
+    score_statistic,
+    score_statistic_distr_weights,
+    score_statistic_liu_params,
+)
+from numpy import array, eye
+from numpy.random import RandomState
+from numpy.testing import assert_allclose
+from numpy_sugar.linalg import economic_qs
+
+
+@pytest.fixture
+def data():
+    random = RandomState(0)
+    n_samples = 3
+    n_covariates = 2
+
+    W = random.randn(n_samples, n_covariates)
+
+    K0 = random.randn(n_samples, n_samples)
+    K0 = K0 @ K0.T
+
+    v = 0.2
+    K = v * K0 + eye(n_samples)
+    dK = K0
+
+    alpha = array([0.5, -0.2])
+    y = random.multivariate_normal(W @ alpha, K)
+
+    return {"y": y, "W": W, "K": K, "dK": dK}
+
+
+def test_QSCov(data):
+    rank = 2
+    K = data["K"][:, :rank] @ data["K"][:, :rank].T
+    n_samples = K.shape[0]
+    QS = economic_qs(K)
+    a = 0.2
+    b = 0.3
+
+    finalK = a * K + b * eye(n_samples)
+
+    qscov = QSCov(QS[0][0], QS[1], a, b)
+    v = array([0.3, -0.2, 0.19])
+
+    assert_allclose(finalK @ v, qscov.dot(v))
+    assert_allclose(rsolve(finalK, v), qscov.solve(v))
+
+
+def test_P_matrix(data):
+    P = array(
+        [
+            [0.50355613, -0.24203676, -0.34880245],
+            [-0.24203676, 0.11633617, 0.16765363],
+            [-0.34880245, 0.16765363, 0.24160792],
+        ]
+    )
+    assert_allclose(P_matrix(data["W"], data["K"]), P)
+
+
+def test_score_statistic(data):
+    q = score_statistic(data["y"], data["W"], data["K"], data["dK"])
+    assert_allclose(q, 0.49961017073389324)
+
+
+def test_score_statistic_distr_weights(data):
+    weights = score_statistic_distr_weights(data["W"], data["K"], data["dK"])
+    assert_allclose(weights, array([4.55266277e-09, 3.46249449e-01]), atol=1e-7)
+
+
+def test_score_statistic_liu_params(data):
+    q = score_statistic(data["y"], data["W"], data["K"], data["dK"])
+    weights = score_statistic_distr_weights(data["W"], data["K"], data["dK"])
+    params = score_statistic_liu_params(q, weights)
+    assert_allclose(params["pv"], 0.22966744652848403)
+    assert_allclose(params["mu_q"], 0.34624945394475326)
+    assert_allclose(params["sigma_q"], 0.48967066729451103)
+    assert_allclose(params["dof_x"], 1.0)
+
+
+def test_qmin(data):
+    params = []
+    params += [{"pv": 0.22966742, "mu_q": 0.34945, "sigma_q": 0.48670, "dof_x": 1.5}]
+    params += [{"pv": 0.65, "mu_q": 0.695, "sigma_q": 0.1, "dof_x": 0.7}]
+    expected = [0.5506645025120773, 0.7157125486956082]
+    assert_allclose(qmin(params), expected)
diff --git a/cellregmap/test/test_simulation.py b/cellregmap/test/test_simulation.py
new file mode 100644
index 0000000..5c54d07
--- /dev/null
+++ b/cellregmap/test/test_simulation.py
@@ -0,0 +1,217 @@
+import pytest
+from numpy import logical_and, ones, tile, zeros
+from numpy.linalg import matrix_rank
+from numpy.random import RandomState
+from numpy.testing import assert_, assert_allclose, assert_equal
+
+from cellregmap._simulate import (
+    column_normalize,
+    create_environment_matrix,
+    create_variances,
+    sample_covariance_matrix,
+    sample_genotype,
+    sample_gxe_effects,
+    sample_maf,
+    sample_noise_effects,
+    sample_persistent_effsizes,
+    sample_phenotype,
+)
+
+
+def test_sample_maf():
+    random = RandomState(0)
+    n_snps = 30
+    maf_min = 0.2
+    maf_max = 0.3
+    mafs = sample_maf(n_snps, maf_min, maf_max, random)
+    assert_(all(logical_and(maf_min <= mafs, mafs <= maf_max)))
+    assert_(len(mafs) == n_snps)
+
+
+def test_sample_genotype():
+    random = RandomState(0)
+    n_samples = 3
+    n_snps = 30
+    maf_min = 0.2
+    maf_max = 0.3
+    mafs = sample_maf(n_snps, maf_min, maf_max, random)
+    G = sample_genotype(n_samples, mafs, random)
+    assert_(G.shape == (n_samples, n_snps))
+
+    A = set(list(G.ravel()))
+    B = set([0.0, 1.0, 2.0])
+    assert_(A - B == set())
+
+
+def test_column_normalize():
+    random = RandomState(0)
+    n_samples = 10
+    n_snps = 30
+    maf_min = 0.2
+    maf_max = 0.3
+    mafs = sample_maf(n_snps, maf_min, maf_max, random)
+    G = sample_genotype(n_samples, mafs, random)
+    G = column_normalize(G)
+    assert_allclose(G.mean(0), zeros(n_snps), atol=1e-7)
+    assert_allclose(G.std(0), ones(n_snps))
+
+
+def test_sample_covariance_matrix():
+    random = RandomState(0)
+    n_samples = 5
+    K = sample_covariance_matrix(n_samples, random)
+    assert_(K.shape == (n_samples, n_samples))
+    assert_allclose(K.diagonal().mean(), 1.0)
+    assert_(matrix_rank(K) == K.shape[0])
+
+    n_rep = 2
+    K = sample_covariance_matrix(n_samples, random, n_rep)
+    assert_(K.shape == (n_rep * n_samples, n_rep * n_samples))
+    assert_allclose(K.diagonal().mean(), 1.0)
+    assert_(matrix_rank(K) == K.shape[0])
+
+
+def test_variances():
+    r0 = 0.1
+    v0 = 0.5
+
+    v = create_variances(r0, v0)
+    assert_allclose(v.g + v.gxe + v.k + v.e + v.n, 1.0)
+    assert_allclose(v.e, v.n)
+    assert_allclose(v.n, v.k)
+
+    has_kinship = False
+    v = create_variances(r0, v0, has_kinship)
+    assert_allclose(v.g + v.gxe + v.e + v.n, 1.0)
+    assert_allclose(v.e, v.n)
+    assert_equal(v.k, None)
+
+
+def test_sample_persistent_effsizes():
+    random = RandomState(0)
+    n_effects = 10
+    causal_indices = [3, 5]
+    variance = 0.9
+
+    with pytest.raises(IndexError):
+        sample_persistent_effsizes(2, causal_indices, variance, random)
+
+    beta = sample_persistent_effsizes(n_effects, causal_indices, variance, random)
+    assert_allclose(beta.mean(), 0.0, atol=1e-7)
+    assert_allclose((beta ** 2).sum(), variance)
+
+
+def test_sample_gxe_effects():
+    random = RandomState(0)
+    n_samples = 10
+    n_snps = 30
+    n_rep = 2
+    maf_min = 0.2
+    maf_max = 0.3
+    mafs = sample_maf(n_snps, maf_min, maf_max, random)
+
+    G = sample_genotype(n_samples, mafs, random)
+    G = tile(G, (n_rep, 1))
+    G = column_normalize(G)
+
+    E = random.randn(5, 5)
+    E = create_environment_matrix(E, n_samples, n_rep, 3, random)
+    E = column_normalize(E)
+
+    causal_indices = [3, 5, 8]
+    variance = 0.9
+
+    y2 = sample_gxe_effects(G, E, causal_indices, variance, random)
+    assert_allclose(y2.mean(), 0.0, atol=1e-7)
+    assert_allclose(y2.var(), variance)
+
+
+# TODO: put it back
+# def test_sample_environment_effects():
+#     random = RandomState(0)
+#     n_samples = 10
+
+#     E = random.randn(5, 5)
+#     E = create_environment_matrix(E, n_samples, 2, 3, random)
+#     E = column_normalize(E)
+
+#     variance = 0.3
+#     y3 = sample_environment_effects(E, variance, random)
+
+#     assert_allclose(y3.mean(), 0.0, atol=1e-7)
+#     assert_allclose(y3.var(), variance)
+
+
+# def test_sample_population_effects():
+#     random = RandomState(0)
+#     n_samples = 10
+#     variance = 0.4
+#     n_rep = 1
+#     K = sample_covariance_matrix(n_samples, random, n_rep)
+#     y4 = sample_population_effects(K, variance, random)
+
+#     assert_allclose(y4.mean(), 0.0, atol=1e-7)
+#     assert_allclose(y4.var(), variance)
+
+
+def test_sample_noise_effects():
+    random = RandomState(0)
+    n_samples = 10
+
+    variance = 0.3
+    y5 = sample_noise_effects(n_samples, variance, random)
+
+    assert_allclose(y5.mean(), 0.0, atol=1e-7)
+    assert_allclose(y5.var(), variance)
+
+
+def test_sample_phenotype():
+    from numpy import corrcoef
+
+    random = RandomState(0)
+
+    r0 = 0.1
+    v0 = 0.5
+    v = create_variances(r0, v0)
+
+    E = random.randn(30, 10)
+
+    offset = 0.3
+    n_samples = 500
+    n_rep = 2
+    n_env = 3
+    s = sample_phenotype(
+        offset=offset,
+        E=E,
+        n_samples=n_samples,
+        n_snps=300,
+        n_rep=n_rep,
+        n_env=n_env,
+        maf_min=0.1,
+        maf_max=0.4,
+        g_causals=[3, 4],
+        gxe_causals=[4, 5],
+        variances=v,
+        random=random,
+    )
+    assert_allclose(
+        s.y_g.var() + s.y_gxe.var() + s.y_n.var() + s.y_e.var() + s.y_k.var(), 1.0
+    )
+    assert_(abs(corrcoef(s.y_g, s.y_gxe)[0, 1]) < 0.1)
+    assert_(abs(corrcoef(s.y_g, s.y_n)[0, 1]) < 0.1)
+    assert_(abs(corrcoef(s.y_g, s.y_e)[0, 1]) < 0.1)
+    assert_(abs(corrcoef(s.y_g, s.y_k)[0, 1]) < 0.1)
+
+    assert_(abs(corrcoef(s.y_gxe, s.y_n)[0, 1]) < 0.1)
+    assert_(abs(corrcoef(s.y_gxe, s.y_e)[0, 1]) < 0.1)
+    assert_(abs(corrcoef(s.y_gxe, s.y_k)[0, 1]) < 0.1)
+
+    assert_(abs(corrcoef(s.y_n, s.y_e)[0, 1]) < 0.1)
+    assert_(abs(corrcoef(s.y_n, s.y_k)[0, 1]) < 0.1)
+
+    assert_(abs(corrcoef(s.y_e, s.y_k)[0, 1]) < 0.1)
+    assert_allclose(s.y, offset + s.y_g + s.y_gxe + s.y_n + s.y_e + s.y_k)
+    assert_allclose(s.y.mean(), offset)
+
+    assert_(s.E.shape[0] == n_samples * n_rep)
+    assert_(s.E.shape[1] == n_env)
diff --git a/cellregmap/test/test_struct_lmm2.py b/cellregmap/test/test_struct_lmm2.py
new file mode 100644
index 0000000..22aa2e6
--- /dev/null
+++ b/cellregmap/test/test_struct_lmm2.py
@@ -0,0 +1,451 @@
+from numpy import arange, asarray, eye, median, min, ones
+from numpy.linalg import cholesky
+from numpy.random import RandomState, default_rng
+from numpy.testing import assert_, assert_allclose
+
+from cellregmap import create_variances, sample_phenotype, sample_phenotype_gxe
+
+
+# from struct_lmm import StructLMM
+# @pytest.mark.skip
+def test_struct_lmm2_assoc():
+    random = RandomState(0)
+
+    n_samples = 50
+    maf_min = 0.05
+    maf_max = 0.45
+    n_snps = 20
+    n_rep = 1
+    n_env = 2
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+
+    v = create_variances(r0, v0)
+    E = random.normal(size=[n_samples, n_env])
+
+    s = sample_phenotype(
+        offset=offset,
+        E=E,
+        n_samples=n_samples,
+        n_snps=n_snps,
+        n_rep=n_rep,
+        n_env=n_env,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    M = ones((n_samples, 1))
+    # slmm = StructLMM(s.y.copy(), M=M, E=s.E, W=s.E)
+    # slmm.fit(verbose=False)
+
+    # p_values0 = []
+    # print()
+    # for i in range(n_snps):
+    #     g = s.G[:, [i]]
+    #     p = slmm.score_2dof_assoc(g)
+    #     print("{}\t{}".format(i, p))
+    #     p_values0.append(p)
+
+    slmm2 = StructLMM2(s.y, M, s.E)
+    slmm2.fit_null_association()
+    p_values1 = slmm2.scan_association(s.G)
+    # for i, pv in enumerate(p_values1):
+    #     print("{}\t{}".format(i, pv))
+
+
+def test_struct_lmm2_inter():
+    random = RandomState(3)
+
+    n_samples = 280
+    # n_samples = 500
+    maf_min = 0.05
+    maf_max = 0.45
+    n_snps = 20
+    n_rep = 1
+    n_env = 4
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+    E = random.normal(size=[n_samples, 2])
+
+    v = create_variances(r0, v0)
+
+    s = sample_phenotype(
+        offset=offset,
+        E=E,
+        n_samples=n_samples,
+        n_snps=n_snps,
+        n_rep=n_rep,
+        n_env=n_env,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    M = ones((n_samples, 1))
+
+    # p_values0 = []
+    # for i in range(n_snps):
+    #     g = s.G[:, [i]]
+    #     Mg = concatenate([M, g], axis=1)
+    #     slmm_int = StructLMM(s.y.copy(), M=Mg, E=s.E, W=s.E)
+    #     slmm_int.fit(verbose=False)
+    #     p = slmm_int.score_2dof_inter(g)
+    #     print("{}\t{}".format(i, p))
+    #     p_values0.append(p)
+
+    # TODO: add kinship to test it properly
+    slmm2 = StructLMM2(s.y, M, s.E)
+    pvals = slmm2.scan_interaction(s.G)
+
+    causal_pvalues = [pvals[i] for i in range(len(pvals)) if i in gxe_causals]
+    noncausal_pvalues = [pvals[i] for i in range(len(pvals)) if i not in gxe_causals]
+    causal_pvalues = asarray(causal_pvalues)
+    noncausal_pvalues = asarray(noncausal_pvalues)
+
+    assert_(all(causal_pvalues < 1e-7))
+    assert_(all(noncausal_pvalues > 1e-3))
+
+
+def test_struct_lmm2_inter_kinship():
+    random = RandomState(8)
+
+    n_samples = 346
+    maf_min = 0.05
+    maf_max = 0.45
+    n_snps = 20
+    n_rep = 1
+    n_env = 3
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+    E = random.normal(size=[n_samples, 2])
+
+    v = create_variances(r0, v0)
+
+    s = sample_phenotype(
+        offset=offset,
+        E=E,
+        n_samples=n_samples,
+        n_snps=n_snps,
+        n_rep=n_rep,
+        n_env=n_env,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    M = ones((n_samples, 1))
+
+    G_kinship = cholesky(s.K + eye(n_samples) * 1e-7)
+    slmm2 = StructLMM2(s.y, M, s.E, G_kinship)
+    pvals = slmm2.scan_interaction(s.G)
+
+    causal_pvalues = [pvals[i] for i in range(len(pvals)) if i in gxe_causals]
+    noncausal_pvalues = [pvals[i] for i in range(len(pvals)) if i not in gxe_causals]
+    causal_pvalues = asarray(causal_pvalues)
+    noncausal_pvalues = asarray(noncausal_pvalues)
+
+    assert_(all(causal_pvalues < 1e-7))
+    assert_(all(noncausal_pvalues > 1e-3))
+
+
+def test_struct_lmm2_inter_kinship_permute():
+    random = RandomState(2)
+
+    n_samples = 500
+    maf_min = 0.05
+    maf_max = 0.45
+    n_snps = 20
+    n_rep = 1
+    n_env = 3
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+    E = random.normal(size=[n_samples, 2])
+
+    v = create_variances(r0, v0)
+
+    s = sample_phenotype(
+        offset=offset,
+        E=E,
+        n_samples=n_samples,
+        n_snps=n_snps,
+        n_rep=n_rep,
+        n_env=n_env,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    M = ones((n_samples, 1))
+
+    G_kinship = cholesky(s.K + eye(n_samples) * 1e-7)
+    slmm2 = StructLMM2(s.y, M, s.E, G_kinship)
+    random = RandomState(1)
+    idx = random.permutation(s.E.shape[0])
+    pvals = slmm2.scan_interaction(s.G, idx_E=idx)
+    assert_(median(pvals) > 0.3)
+    assert_(min(pvals) > 0.04)
+
+
+def test_struct_lmm2_inter_kinship_repetition():
+    import numpy as np
+
+    random = default_rng(20)
+
+    n_individuals = 100
+    # n_individuals = 200
+    # n_individuals = 500
+    # n_individuals = 250
+    maf_min = 0.40
+    # maf_min = 0.20
+    # maf_min = 0.05
+    maf_max = 0.45
+    # n_snps = 250
+    n_snps = 100
+    # n_snps = 50
+    # n_snps = 20
+    # n_cells = 100
+    # n_cells = 10
+    # n_cells = 2
+    n_cells = arange(n_individuals) + 1
+    # n_cells = 1
+    n_env = 2
+    n_env_groups = 3
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+
+    v = create_variances(r0, v0)
+
+    s = sample_phenotype(
+        offset=offset,
+        n_individuals=n_individuals,
+        n_snps=n_snps,
+        n_cells=n_cells,
+        n_env=n_env,
+        n_env_groups=n_env_groups,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    slmm2 = StructLMM2(s.y, s.M, s.E, s.Lk)
+    # idx = random.permutation(s.E.shape[0])
+    # pvals = slmm2.scan_interaction(s.G, idx_E=idx)
+    from scipy.stats.stats import pearsonr
+
+    corr = {}
+    corr_pvals = {}
+
+    corr["y_e"] = [pearsonr(s.y_e, s.G[:, i])[0] for i in range(s.G.shape[1])]
+    corr_pvals["y_e"] = [pearsonr(s.y_e, s.G[:, i])[1] for i in range(s.G.shape[1])]
+
+    corr["y_n"] = [pearsonr(s.y_n, s.G[:, i])[0] for i in range(s.G.shape[1])]
+    corr_pvals["y_n"] = [pearsonr(s.y_n, s.G[:, i])[1] for i in range(s.G.shape[1])]
+
+    pvals, info = slmm2.scan_interaction(s.G)
+    print(pearsonr(pvals, np.abs(corr["y_e"])))
+    print(pearsonr(pvals, np.abs(corr["y_n"])))
+    assert_(median(pvals) > 0.3)
+    assert_(min(pvals) > 0.04)
+
+
+def test_struct_lmm2_inter_kinship_repetition_gxe():
+    import numpy as np
+
+    random = default_rng(20)
+
+    n_individuals = 100
+    # n_individuals = 200
+    # n_individuals = 500
+    # n_individuals = 250
+    maf_min = 0.40
+    # maf_min = 0.20
+    # maf_min = 0.05
+    maf_max = 0.45
+    # n_snps = 250
+    n_snps = 100
+    # n_snps = 50
+    # n_snps = 20
+    # n_cells = 100
+    # n_cells = 10
+    # n_cells = 2
+    n_cells = arange(n_individuals) + 1
+    # n_cells = 1
+    n_env = 2
+    n_env_groups = 3
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+
+    v = create_variances(r0, v0)
+
+    # Timing:
+    # - n_individuals
+    # - n_env_groups
+    # - n_samples
+
+    s = sample_phenotype_gxe(
+        offset=offset,
+        n_individuals=n_individuals,
+        n_snps=n_snps,
+        n_cells=n_cells,
+        n_env=n_env,
+        n_env_groups=n_env_groups,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    slmm2 = StructLMM2(s.y, s.M, s.E, s.Ls)
+    # idx = random.permutation(s.E.shape[0])
+    # pvals = slmm2.scan_interaction(s.G, idx_E=idx)
+    from scipy.stats.stats import pearsonr
+
+    corr = {}
+    corr_pvals = {}
+
+    corr["y_e"] = [pearsonr(s.y_e, s.G[:, i])[0] for i in range(s.G.shape[1])]
+    corr_pvals["y_e"] = [pearsonr(s.y_e, s.G[:, i])[1] for i in range(s.G.shape[1])]
+
+    corr["y_n"] = [pearsonr(s.y_n, s.G[:, i])[0] for i in range(s.G.shape[1])]
+    corr_pvals["y_n"] = [pearsonr(s.y_n, s.G[:, i])[1] for i in range(s.G.shape[1])]
+
+    pvals, info = slmm2.scan_interaction(s.G)
+    print(pearsonr(pvals, np.abs(corr["y_e"])))
+    print(pearsonr(pvals, np.abs(corr["y_n"])))
+    assert_(median(pvals) > 0.3)
+    assert_(min(pvals) > 0.04)
+
+
+def test_struct_lmm2_inter_kinship_predict():
+    random = RandomState(0)
+
+    n_individuals = 100
+    maf_min = 0.05
+    maf_max = 0.45
+    n_snps = 20
+    n_cells = 2
+    n_env_groups = 3
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+
+    v = create_variances(r0, v0)
+
+    s = sample_phenotype_gxe(
+        offset=offset,
+        n_individuals=n_individuals,
+        n_snps=n_snps,
+        n_cells=n_cells,
+        n_env_groups=n_env_groups,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    slmm2 = StructLMM2(s.y, s.M, s.E, s.Ls)
+    beta_g_s, beta_gxe_s = slmm2.predict_interaction(s.G, s.mafs)
+    assert_allclose(beta_g_s[3], -0.07720025290188615)
+    assert_allclose(beta_g_s[-1], 0.022415332334122483)
+    assert_allclose(beta_gxe_s[1, 1], 0.010062608120425824)
+    assert_allclose(beta_gxe_s[13, -1], -0.05566938579548831)
+
+
+def test_struct_lmm2_estimate_aggregate_environment():
+    random = default_rng(20)
+
+    n_individuals = 100
+    # n_individuals = 200
+    # n_individuals = 500
+    # n_individuals = 250
+    maf_min = 0.40
+    # maf_min = 0.20
+    # maf_min = 0.05
+    maf_max = 0.45
+    # n_snps = 250
+    n_snps = 100
+    # n_snps = 50
+    # n_snps = 20
+    # n_cells = 100
+    # n_cells = 10
+    n_cells = 2
+    # n_cells = arange(n_individuals) + 1
+    # n_cells = 1
+    n_env_groups = 3
+    offset = 0.3
+    r0 = 0.5
+    v0 = 0.5
+    g_causals = [5, 6]
+    gxe_causals = [10, 11]
+
+    v = create_variances(r0, v0)
+
+    # Timing:
+    # - n_individuals
+    # - n_env_groups
+    # - n_samples
+
+    s = sample_phenotype_gxe(
+        offset=offset,
+        n_individuals=n_individuals,
+        n_snps=n_snps,
+        n_cells=n_cells,
+        n_env_groups=n_env_groups,
+        maf_min=maf_min,
+        maf_max=maf_max,
+        g_causals=g_causals,
+        gxe_causals=gxe_causals,
+        variances=v,
+        random=random,
+    )
+
+    slmm2 = StructLMM2(s.y, s.M, s.E, s.Ls)
+    import numpy as np
+    for i in range(5):
+        a = slmm2.estimate_aggregate_environment(s.G[:, i])
+        print(np.var(a))
+    b = slmm2.estimate_aggregate_environment(s.G[:, 10])
+    c = slmm2.estimate_aggregate_environment(s.G[:, 11])
+    print(np.var(b))
+    print(np.var(c))
+    pass
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d4bb2cb
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..566feac
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,61 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'CellRegMap'
+copyright = '2022, Anna Cuomo'
+author = 'Anna Cuomo'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+]
+
+autodoc_default_flags = ["members"]
+autosummary_generate = True
+napoleon_numpy_docstring = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+pygments_style = "default"
+html_theme = "sphinx_rtd_theme"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
diff --git a/docs/functions.rst b/docs/functions.rst
new file mode 100644
index 0000000..191bcce
--- /dev/null
+++ b/docs/functions.rst
@@ -0,0 +1,15 @@
+**********
+CellRegMap
+**********
+
+.. currentmodule:: cellregmap
+
+.. autosummary::
+  :toctree: _autosummary
+  :template: class.rst
+
+  CellRegMap
+  run_association
+  run_interaction
+  estimate_betas
+  # lrt_pvalues
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..4610cec
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,39 @@
+.. CellRegMap documentation master file, created by
+   sphinx-quickstart on Mon Jan 10 23:46:16 2022.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to CellRegMap's documentation!
+======================================
+
+What is CellRegMap
+------------------
+
+The cellular regulatory map (``cellregmap``) is a statistical framework to map context-specific effects of genetic variants on single-cell gene expression.
+It builds on a linear mixed model and is implemented in Python.
+
+What do you need to run CellRegMap
+----------------------------------
+
+Single-cell expression profiles assayed through scRNA-seq in the form of a count matrix (cells x genes)
+
+Cellular contexts: these can be known factors or a latent representation of the space (e.g., PCs by cells)
+
+Genotypes: CellRegMap tests for individual eQTL effects between a genetic variant and a gene's expression, so genotypes need to be provided for variants of interest.
+
+A kinship matrix accounting for relatedness among samples. 
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   install
+   functions
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/docs/install.rst b/docs/install.rst
new file mode 100644
index 0000000..cb55400
--- /dev/null
+++ b/docs/install.rst
@@ -0,0 +1,26 @@
+******************
+Install CellRegMap
+******************
+
+Stable version
+--------------
+
+``cellregmap`` can be installed `from PyPI <https://pypi.org/project/cellregmap>`_ with ``pip``:
+::
+	python3 -m pip install cellregmap
+
+
+Development version
+-------------------
+
+To install ``cellregmap`` in development mode, install it from the `GitHub repository <https://github.com/limix/CellRegMap>`_:
+::
+	git clone https://github.com/limix/CellRegMap.git
+	cd CellRegMap
+	python3 -m pip install -e
+
+
+Docker installation
+-------------------
+
+If you use Docker, you can also pull the `pre-build image from dockerhub <https://hub.docker.com/r/annasecuomo/cellregmap>`_
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..153be5e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..3bcf96c
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,3 @@
+sphinx>=2.1.2
+# cellregmap
+git+https://github.com/limix/CellRegMap.git
diff --git a/get_data.py b/get_data.py
deleted file mode 100644
index 8b532d0..0000000
--- a/get_data.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import pandas as pd
-import limix
-
-def get_genotype_data(geno_prefix):
-    bim,fam,bed = limix.io.plink.read(geno_prefix, verbose = False)
-    fam.set_index('iid', inplace = True)
-    return bim,fam,bed
-
-'''
-given a df of phenotype vectors df_y
-and a sample mapping file
-mapping genotype samples (e.g. individuals) to phenotype samples (e.g. cells)
-returns genotype samples corresponding to the samples in y
-'''
-def map_samples(df_y, sample_mapping):
-	df0 = pd.read_csv(sample_mapping, sep = " ")
-	df0 = df0.set_index("phenotype_sample")
-	return df0.loc[df_y.index, "genotype_sample"]
-
-'''
-given a df of phenotype vectors df_y
-and a kinship matrix
-returns kinship only containing samples included in phenotype
-with the possibility of mapping samples first 
-'''
-def filter_kinship(K, df_y, sample_mapping = None):
-    if sample_mapping is not None:
-    	samples = map_samples(df_y, sample_mapping)
-    else: samples = df_y.index
-    K_filtered = K[samples,:][:,samples]
-    return K_filtered
-
-kinship_mat = kinship_df.loc[individual_ids,individual_ids].values if kinship_df is not None else None
\ No newline at end of file
diff --git a/proof.md b/proof.md
new file mode 100644
index 0000000..142ee3e
--- /dev/null
+++ b/proof.md
@@ -0,0 +1,79 @@
+K = GGt, Sigma = EEt
+M = K*Sigma
+
+Let & be the Hadamard product.
+
+We known that:
+
+  rank(A&B) <= rank(A) * rank(B)
+
+Let
+
+  K     = U @ Dk @ Ut
+  Sigma = V @ Ds @ Vt
+
+for diagonal matrices Dk and Ds.
+
+K & Sigma = K & (Sum_i vi * lambda_i * vi^T ) = Sum_i (K & (vi @ lambda_i @ vi^T))
+                                              = Sum_i ( Lki @ Lki^T )
+
+Let e = |1 1 ... 1|. We have
+
+  K & (vi @ lambda_i @ vi^T) = K & (ui @ ui^T)
+                             = K & (Diag(ui) @ e @ e^T @ Diag(ui))
+                             = Diag(ui) @ (K & (e @ e^T)) @ Diag(ui) (See (2.10), [1])
+                             = Diag(ui) @ K @ Diag(ui)
+                             = Diag(ui) @ G @ Gt @ Diag(ui)
+                             = Lki @ Lki^T
+
+for Lki = Diag(ui) @ G.
+
+Trial
+-----
+
+```python
+  import numpy as np
+
+  K = np.random.randn(3, 3)
+  K = K @ K.T
+
+  # rank-2 symmetric matrix
+  u0 = np.random.randn(3, 1)
+  u1 = np.random.randn(3, 1)
+  Sigma = u0 @ u0.T + u1 @ u1.T
+  sigma, U = np.linalg.eigh(Sigma)
+
+  tmp = sigma[1] * U[:, [1]] @ U[:, [1]].T + sigma[2] * U[:, [2]] @ U[:, [2]].T
+  # Close to zero
+  print(tmp - Sigma)
+
+  u1 = np.sqrt(sigma[1]) * U[:, [1]]
+  u2 = np.sqrt(sigma[2]) * U[:, [2]]
+
+  tmp = u1 @ u1.T + u2 @ u2.T - Sigma
+  # Close to zero
+  print(tmp)
+
+  e = np.ones(3)
+
+  G = np.linalg.cholesky(K)
+
+  Lk1 = np.diag(u1.ravel()) @ G
+  Lk2 = np.diag(u2.ravel()) @ G
+
+  # Close to zero
+  print(Lk1 @ Lk1.T + Lk2 @ Lk2.T - K * Sigma)
+```
+
+Goal
+----
+
+- tr(K & Sigma) (there is a property)
+- det(K & Sigma)
+- (K & Sigma)^{-1} @ x
+
+References
+----------
+
+[1] Hadamard Products and Multivariate Statistical Analysis.
+    https://core.ac.uk/download/pdf/82272966.pdf
diff --git a/greater power and computational efficiency.pdf.bz2 b/references/greater power and computational efficiency.pdf.bz2
similarity index 100%
rename from greater power and computational efficiency.pdf.bz2
rename to references/greater power and computational efficiency.pdf.bz2
diff --git a/rachel thesis part 1.pdf.bz2 b/references/rachel thesis part 1.pdf.bz2
similarity index 100%
rename from rachel thesis part 1.pdf.bz2
rename to references/rachel thesis part 1.pdf.bz2
diff --git a/run_structLMM2.py b/run_structLMM2.py
deleted file mode 100644
index cbbe4f5..0000000
--- a/run_structLMM2.py
+++ /dev/null
@@ -1,188 +0,0 @@
-def _mean_standardize(X, axis = None):
-    from numpy_sugar import epsilon
-    from numpy import inf
-
-    X = X - np.nanmean(X,0)
-    X = X / np.nanstd(X,0)
-
-    return X
-
-def get_genotype_data(geno_prefix):
-    bim,fam,bed = limix.io.plink.read(geno_prefix, verbose = False)
-    fam.set_index('iid', inplace = True)
-    return bim,fam,bed
-
-def run_structLMM_QTL_analysis_load_intersect_phenotype_environments_covariates_kinship_sample_mapping\
-        (pheno_filename, anno_filename, env_filename, geno_prefix, plinkGenotype,
-            cis_mode = True, interaction_mode = True,
-            relatedness_score = 0.95, snps_filename = None, feature_filename = None,
-            snp_feature_filename = None, selection = 'all', covariates_filename = None, kinship_filename = None,
-            sample_mapping_filename = None, feature_variant_covariate_filename = None):
-    selectionStart = None
-    selectionEnd = None
-    if(":" in selection):
-        parts = selection.split(":")
-        if("-" not in parts[1]):
-            print("No correct sub selection.")
-            print("Given in: "+selection)
-            print("Expected format: (chr number):(start location)-(stop location)")
-            sys.exit()
-        chromosome = parts[0]
-        if("-" in parts[1]):
-            parts2 = parts[1].split("-")
-            selectionStart = int(parts2[0])
-            selectionEnd = int(parts2[1])
-    else :
-        chromosome = selection
-
-    ''' function to take input and intersect sample and genotype.'''
-    #Load input data files & filter for relevant data
-    #Load input data filesf
-
-    phenotype_df = qtl_loader_utils.get_phenotype_df(pheno_filename)
-    annotation_df = qtl_loader_utils.get_annotation_df(anno_filename)
-
-    if(plinkGenotype):
-        bim,fam,bed = get_genotype_data(geno_prefix)
-        annotation_df.replace(['X', 'Y', 'XY', 'MT'], ['23', '24', '25', '26'], inplace = True)
-        if chromosome == 'X' :
-            chromosome = '23'
-        elif chromosome == 'Y':
-            chromosome = '24'
-        elif chromosome == 'XY':
-            chromosome = '25'
-        elif chromosome == 'MT':
-            chromosome = '26'
-
-
-    else :
-        geno_prefix+='.bgen'
-        print(geno_prefix)
-    print("Intersecting data.")
-
-    if(annotation_df.shape[0] != annotation_df.groupby(annotation_df.index).first().shape[0]):
-        print("Only one location per feature supported. If multiple locations are needed please look at: --extended_anno_file")
-        sys.exit()
-
-    ##Make sure that there is only one entry per feature id!.
-    sample2individual_df = qtl_loader_utils.get_samplemapping_df(sample_mapping_filename, list(phenotype_df.columns), 'sample')
-    sample2individual_df['sample'] = sample2individual_df.index
-    sample2individual_df = sample2individual_df.drop_duplicates();
-
-
-    ##Filter first the linking files!
-    #Subset linking to relevant genotypes.
-    orgSize = sample2individual_df.shape[0]
-    sample2individual_df = sample2individual_df.loc[sample2individual_df['iid'].map(lambda x: x in list(map(str, fam.index))),:]
-    diff = orgSize - sample2individual_df.shape[0]
-    orgSize = sample2individual_df.shape[0]
-    print("Dropped: "+str(diff)+" samples because they are not present in the genotype file.")
-
-    #Subset linking to relevant phenotypes.
-    sample2individual_df = sample2individual_df.loc[np.intersect1d(sample2individual_df.index,phenotype_df.columns),:]
-    diff = orgSize- sample2individual_df.shape[0]
-    orgSize = sample2individual_df.shape[0]
-    print("Dropped: " + str(diff) + " samples because they are not present in the phenotype file.")
-    #Subset linking vs kinship.
-    kinship_df = qtl_loader_utils.get_kinship_df(kinship_filename)
-    if kinship_df is not None:
-        #Filter from individual2sample_df & sample2individual_df since we don't want to filter from the genotypes.
-        sample2individual_df = sample2individual_df[sample2individual_df['iid'].map(lambda x: x in list(map(str, kinship_df.index)))]
-        diff = orgSize - sample2individual_df.shape[0]
-        orgSize = sample2individual_df.shape[0]
-        print("Dropped: " + str(diff) + " samples because they are not present in the kinship file.")
-    #Subset linking vs covariates.
-    covariate_df = qtl_loader_utils.get_covariate_df(covariates_filename)
-    if covariate_df is not None:
-        if np.nansum(covariate_df == 1, 0).max() < covariate_df.shape[0]: covariate_df.insert(0, 'ones',np.ones(covariate_df.shape[0]))
-        sample2individual_df = sample2individual_df.loc[list(set(sample2individual_df.index) & set(covariate_df.index)),:]
-        diff = orgSize - sample2individual_df.shape[0]
-        orgSize = sample2individual_df.shape[0]
-        print("Dropped: " + str(diff) + " samples because they are not present in the covariate file.")
-    #Subset linking vs environments.
-    environment_df = qtl_loader_utils.get_env_df(env_filename)
-    # import pdb; pdb.set_trace()
-    # if np.nansum(environment_df == 1, 0).max() < environment_df.shape[0]: environment_df.insert(0, 'ones',np.ones(environment_df.shape[0]))
-    sample2individual_df = sample2individual_df.loc[list(set(sample2individual_df.index) & set(environment_df.index)),:]
-    diff = orgSize - sample2individual_df.shape[0]
-    orgSize = sample2individual_df.shape[0]
-    print("Dropped: " + str(diff) + " samples because they are not present in the environment file.")
-
-    ###
-    print("Number of samples with genotype & phenotype data: " + str(sample2individual_df.shape[0]))
-    if(sample2individual_df.shape[0] < minimum_test_samples):
-        print("Not enough samples with both genotype & phenotype data.")
-        sys.exit()
-
-    ##Filter now the actual data!
-    #Filter phenotype data based on the linking files.
-    phenotype_df = phenotype_df.loc[list(set(phenotype_df.index) & set(annotation_df.index)), sample2individual_df.index.values]
-
-    #Filter kinship data based on the linking files.
-    genetically_unique_individuals = None
-    if kinship_df is not None:
-        kinship_df = kinship_df.loc[np.intersect1d(kinship_df.index, sample2individual_df['iid']), np.intersect1d(kinship_df.index, sample2individual_df['iid'])]
-        genetically_unique_individuals = utils.get_unique_genetic_samples(kinship_df, relatedness_score);
-
-    #Filter covariate data based on the linking files.
-    if covariate_df is not None:
-        covariate_df = covariate_df.loc[np.intersect1d(covariate_df.index, sample2individual_df.index.values),:]
-
-    snp_feature_filter_df = qtl_loader_utils.get_snp_feature_df(snp_feature_filename)
-    try:
-        feature_filter_df = qtl_loader_utils.get_snp_df(feature_filename)
-    except:
-        if feature_filename  is not None:
-            feature_filter_df = pd.DataFrame(index = feature_filename)
-    #Do filtering on features.
-    if feature_filter_df is not None:
-        phenotype_df = phenotype_df.loc[feature_filter_df.index,:]
-        ##Filtering on features to test.
-    if snp_feature_filter_df is not None:
-        phenotype_df = phenotype_df.loc[np.unique(snp_feature_filter_df['feature']),:]
-        ##Filtering on features  to test from the combined feature snp filter.
-
-    if ((not cis_mode) and len(set(bim['chrom'])) < 22) :
-        print("Warning, running a trans-analysis on snp data from less than 22 chromosomes.\nTo merge data later the permutation P-values need to be written out.")
-
-    if(cis_mode):
-        #Remove features from the annotation that are on chromosomes which are not present anyway.
-        annotation_df = annotation_df[np.in1d(annotation_df['chromosome'], list(set(bim['chrom'])))]
-
-    #Prepare to filter on snps.
-    snp_filter_df = qtl_loader_utils.get_snp_df(snps_filename)
-    if snp_filter_df is not None:
-        toSelect = set(snp_filter_df.index).intersection(set(bim['snp']))
-        bim = bim.loc[bim['snp'].isin(toSelect)]
-        ##Filtering on SNPs to test from the snp filter.
-
-    if snp_feature_filter_df is not None:
-        toSelect = set(np.unique(snp_feature_filter_df['snp_id'])).intersection(set(bim['snp']))
-        bim = bim.loc[bim['snp'].isin(toSelect)]
-        ##Filtering on features  to test from the combined feature snp filter.
-
-
-    #Determine features to be tested
-    if chromosome=='all':
-        feature_list = list(set(annotation_df.index) & set(phenotype_df.index))
-    else:
-        if not selectionStart is None :
-            lowest = min([selectionStart,selectionEnd])
-            highest = max([selectionStart,selectionEnd])
-            annotation_df['mean'] = ((annotation_df["start"] + annotation_df["end"])/2)
-            feature_list = list(set(annotation_df.iloc[(annotation_df['chromosome'].values == chromosome) & (annotation_df['mean'].values >= lowest) & (annotation_df["mean"].values < highest)].index.values) & set(phenotype_df.index))
-            del annotation_df['mean']
-        else :
-            feature_list = list(set(annotation_df[annotation_df['chromosome'] == chromosome].index) & set(phenotype_df.index))
-
-    print("Number of features to be tested: " + str(len(feature_list)))
-    print("Total number of variants to be considered, before variante QC and feature intersection: " + str(bim.shape[0]))
-
-
-    feature_variant_covariate_df = qtl_loader_utils.get_snp_feature_df(feature_variant_covariate_filename)
-    return [phenotype_df, kinship_df, covariate_df, environment_df, sample2individual_df, complete_annotation_df, annotation_df, snp_filter_df, snp_feature_filter_df, genetically_unique_individuals, minimum_test_samples, feature_list,bim,fam,bed, chromosome, selectionStart, selectionEnd, feature_variant_covariate_df]
-
-
-
-
-
diff --git a/run_structLMM2_int.py b/run_structLMM2_int.py
deleted file mode 100644
index 5d60962..0000000
--- a/run_structLMM2_int.py
+++ /dev/null
@@ -1,381 +0,0 @@
-
-import sys
-sys.path.insert(0,'/hps/nobackup/stegle/users/mjbonder/tools/hipsci_pipeline/limix_QTL_pipeline/')
-
-# import glob
-# import os
-# # from subprocess import run
-# import pandas as pd
-# import re
-# from os.path import join
-# import scipy as sp
-import numpy as np
-# # from struct_lmm.interpretation import PredictGenEffect
-from sklearn.preprocessing import Imputer
-import limix
-# import qtl_output
-import qtl_loader_utils
-# import qtl_parse_args
-# import qtl_utilities as utils
-# from bgen_reader import read_bgen
-# from numpy import linalg as LA
-# import scipy.linalg as la
-from tqdm import tqdm
-
-from limix_core.covar import FreeFormCov
-from limix_core.gp import GP2KronSumLR
-
-def _mean_standardize(X, axis = None):
-    from numpy_sugar import epsilon
-    from numpy import inf
-
-    X = X - np.nanmean(X,0)
-    X = X / np.nanstd(X,0)
-
-    return X
-
-def get_genotype_data(geno_prefix):
-    bim,fam,bed = limix.io.plink.read(geno_prefix, verbose = False)
-    fam.set_index('iid', inplace = True)
-    return bim,fam,bed
-
-def run_structLMM2_int_load_intersect_phenotype_environments_covariates_kinship_sample_mapping\
-        (pheno_filename, anno_filename, env_filename, geno_prefix, plinkGenotype,
-            cis_mode = True, interaction_mode = True,
-            relatedness_score = 0.95, snps_filename = None, feature_filename = None,
-            snp_feature_filename = None, selection = 'all', covariates_filename = None, kinship_filename = None,
-            sample_mapping_filename = None, feature_variant_covariate_filename = None):
-    selectionStart = None
-    selectionEnd = None
-    if(":" in selection):
-        parts = selection.split(":")
-        if("-" not in parts[1]):
-            print("No correct sub selection.")
-            print("Given in: "+selection)
-            print("Expected format: (chr number):(start location)-(stop location)")
-            sys.exit()
-        chromosome = parts[0]
-        if("-" in parts[1]):
-            parts2 = parts[1].split("-")
-            selectionStart = int(parts2[0])
-            selectionEnd = int(parts2[1])
-    else :
-        chromosome = selection
-
-    ''' function to take input and intersect sample and genotype.'''
-    #Load input data files & filter for relevant data
-    #Load input data filesf
-
-    phenotype_df = qtl_loader_utils.get_phenotype_df(pheno_filename)
-    annotation_df = qtl_loader_utils.get_annotation_df(anno_filename)
-
-    if(plinkGenotype):
-        bim,fam,bed = get_genotype_data(geno_prefix)
-        annotation_df.replace(['X', 'Y', 'XY', 'MT'], ['23', '24', '25', '26'], inplace = True)
-        if chromosome == 'X' :
-            chromosome = '23'
-        elif chromosome == 'Y':
-            chromosome = '24'
-        elif chromosome == 'XY':
-            chromosome = '25'
-        elif chromosome == 'MT':
-            chromosome = '26'
-
-
-    else :
-        geno_prefix+='.bgen'
-        print(geno_prefix)
-    print("Intersecting data.")
-
-    if(annotation_df.shape[0] != annotation_df.groupby(annotation_df.index).first().shape[0]):
-        print("Only one location per feature supported. If multiple locations are needed please look at: --extended_anno_file")
-        sys.exit()
-
-    ##Make sure that there is only one entry per feature id!.
-    sample2individual_df = qtl_loader_utils.get_samplemapping_df(sample_mapping_filename, list(phenotype_df.columns), 'sample')
-    sample2individual_df['sample'] = sample2individual_df.index
-    sample2individual_df = sample2individual_df.drop_duplicates();
-
-
-    ##Filter first the linking files!
-    #Subset linking to relevant genotypes.
-    orgSize = sample2individual_df.shape[0]
-    sample2individual_df = sample2individual_df.loc[sample2individual_df['iid'].map(lambda x: x in list(map(str, fam.index))),:]
-    diff = orgSize - sample2individual_df.shape[0]
-    orgSize = sample2individual_df.shape[0]
-    print("Dropped: "+str(diff)+" samples because they are not present in the genotype file.")
-
-    #Subset linking to relevant phenotypes.
-    sample2individual_df = sample2individual_df.loc[np.intersect1d(sample2individual_df.index,phenotype_df.columns),:]
-    diff = orgSize- sample2individual_df.shape[0]
-    orgSize = sample2individual_df.shape[0]
-    print("Dropped: " + str(diff) + " samples because they are not present in the phenotype file.")
-    #Subset linking vs kinship.
-    kinship_df = qtl_loader_utils.get_kinship_df(kinship_filename)
-    if kinship_df is not None:
-        #Filter from individual2sample_df & sample2individual_df since we don't want to filter from the genotypes.
-        sample2individual_df = sample2individual_df[sample2individual_df['iid'].map(lambda x: x in list(map(str, kinship_df.index)))]
-        diff = orgSize - sample2individual_df.shape[0]
-        orgSize = sample2individual_df.shape[0]
-        print("Dropped: " + str(diff) + " samples because they are not present in the kinship file.")
-    #Subset linking vs covariates.
-    covariate_df = qtl_loader_utils.get_covariate_df(covariates_filename)
-    if covariate_df is not None:
-        if np.nansum(covariate_df == 1, 0).max() < covariate_df.shape[0]: covariate_df.insert(0, 'ones',np.ones(covariate_df.shape[0]))
-        sample2individual_df = sample2individual_df.loc[list(set(sample2individual_df.index) & set(covariate_df.index)),:]
-        diff = orgSize - sample2individual_df.shape[0]
-        orgSize = sample2individual_df.shape[0]
-        print("Dropped: " + str(diff) + " samples because they are not present in the covariate file.")
-    #Subset linking vs environments.
-    environment_df = qtl_loader_utils.get_env_df(env_filename)
-    # import pdb; pdb.set_trace()
-    # if np.nansum(environment_df == 1, 0).max() < environment_df.shape[0]: environment_df.insert(0, 'ones',np.ones(environment_df.shape[0]))
-    sample2individual_df = sample2individual_df.loc[list(set(sample2individual_df.index) & set(environment_df.index)),:]
-    diff = orgSize - sample2individual_df.shape[0]
-    orgSize = sample2individual_df.shape[0]
-    print("Dropped: " + str(diff) + " samples because they are not present in the environment file.")
-
-    ###
-    print("Number of samples with genotype & phenotype data: " + str(sample2individual_df.shape[0]))
-
-
-    ##Filter now the actual data!
-    #Filter phenotype data based on the linking files.
-    phenotype_df = phenotype_df.loc[list(set(phenotype_df.index) & set(annotation_df.index)), sample2individual_df.index.values]
-
-    #Filter kinship data based on the linking files.
-    genetically_unique_individuals = None
-    if kinship_df is not None:
-        kinship_df = kinship_df.loc[np.intersect1d(kinship_df.index, sample2individual_df['iid']), np.intersect1d(kinship_df.index, sample2individual_df['iid'])]
-        genetically_unique_individuals = utils.get_unique_genetic_samples(kinship_df, relatedness_score);
-
-    #Filter covariate data based on the linking files.
-    if covariate_df is not None:
-        covariate_df = covariate_df.loc[np.intersect1d(covariate_df.index, sample2individual_df.index.values),:]
-
-    snp_feature_filter_df = qtl_loader_utils.get_snp_feature_df(snp_feature_filename)
-    try:
-        feature_filter_df = qtl_loader_utils.get_snp_df(feature_filename)
-    except:
-        if feature_filename  is not None:
-            feature_filter_df = pd.DataFrame(index = feature_filename)
-    #Do filtering on features.
-    if feature_filter_df is not None:
-        phenotype_df = phenotype_df.loc[feature_filter_df.index,:]
-        ##Filtering on features to test.
-    if snp_feature_filter_df is not None:
-        phenotype_df = phenotype_df.loc[np.unique(snp_feature_filter_df['feature']),:]
-        ##Filtering on features  to test from the combined feature snp filter.
-
-    if ((not cis_mode) and len(set(bim['chrom'])) < 22) :
-        print("Warning, running a trans-analysis on snp data from less than 22 chromosomes.\nTo merge data later the permutation P-values need to be written out.")
-
-    if(cis_mode):
-        #Remove features from the annotation that are on chromosomes which are not present anyway.
-        annotation_df = annotation_df[np.in1d(annotation_df['chromosome'], list(set(bim['chrom'])))]
-
-    #Prepare to filter on snps.
-    snp_filter_df = qtl_loader_utils.get_snp_df(snps_filename)
-    if snp_filter_df is not None:
-        toSelect = set(snp_filter_df.index).intersection(set(bim['snp']))
-        bim = bim.loc[bim['snp'].isin(toSelect)]
-        ##Filtering on SNPs to test from the snp filter.
-
-    if snp_feature_filter_df is not None:
-        toSelect = set(np.unique(snp_feature_filter_df['snp_id'])).intersection(set(bim['snp']))
-        bim = bim.loc[bim['snp'].isin(toSelect)]
-        ##Filtering on features  to test from the combined feature snp filter.
-
-
-    #Determine features to be tested
-    if chromosome=='all':
-        feature_list = list(set(annotation_df.index) & set(phenotype_df.index))
-    else:
-        if not selectionStart is None :
-            lowest = min([selectionStart,selectionEnd])
-            highest = max([selectionStart,selectionEnd])
-            annotation_df['mean'] = ((annotation_df["start"] + annotation_df["end"])/2)
-            feature_list = list(set(annotation_df.iloc[(annotation_df['chromosome'].values == chromosome) & (annotation_df['mean'].values >= lowest) & (annotation_df["mean"].values < highest)].index.values) & set(phenotype_df.index))
-            del annotation_df['mean']
-        else :
-            feature_list = list(set(annotation_df[annotation_df['chromosome'] == chromosome].index) & set(phenotype_df.index))
-
-    print("Number of features to be tested: " + str(len(feature_list)))
-    print("Total number of variants to be considered, before variante QC and feature intersection: " + str(bim.shape[0]))
-
-
-    feature_variant_covariate_df = qtl_loader_utils.get_snp_feature_df(feature_variant_covariate_filename)
-    return [phenotype_df, kinship_df, covariate_df, environment_df, sample2individual_df, complete_annotation_df, annotation_df, snp_filter_df, snp_feature_filter_df, genetically_unique_individuals, minimum_test_samples, feature_list,bim,fam,bed, chromosome, selectionStart, selectionEnd, feature_variant_covariate_df]
-
-
-import struct_lmm2 as StructLMM2
-
-def run_structLMM2_int(pheno_filename, anno_filename, env_filename, geno_prefix, output_dir,  plinkGenotype = True,
-                        window_size = 250000, min_maf = 0.05, min_hwe_P = 0.001, min_call_rate = 0.95,
-                        interaction_mode = True, cis_mode = True, gaussianize_method = None,
-                        seed = np.random.randint(40000), relatedness_score = 0.95,
-                        feature_variant_covariate_filename = None, snps_filename = None, feature_filename = None,
-                        snp_feature_filename = None, genetic_range = 'all', covariates_filename = None,
-                        kinship_filename = None, sample_mapping_filename = None):
-    fill_NaN = Imputer(missing_values = np.nan, strategy = 'mean', axis = 0)
-
-    [phenotype_df, kinship_df, covariate_df, environment_df, sample2individual_df, annotation_df, snp_filter_df,
-    snp_feature_filter_df, feature_list, bim, fam, bed,
-    chromosome, selectionStart, selectionEnd, feature_variant_covariate_df]=\
-    run_structLMM2_int_load_intersect_phenotype_environments_covariates_kinship_sample_mapping(pheno_filename = pheno_filename,
-                anno_filename = anno_filename, env_filename = env_filename, geno_prefix = geno_prefix,
-                plinkGenotype = plinkGenotype, cis_mode = cis_mode, interaction_mode = interaction_mode,
-                relatedness_score = relatedness_score, snps_filename = snps_filename,
-                feature_filename = feature_filename, snp_feature_filename = snp_feature_filename,
-                selection = genetic_range, covariates_filename = covariates_filename,
-                kinship_filename = kinship_filename, sample_mapping_filename = sample_mapping_filename,
-                feature_variant_covariate_filename = feature_variant_covariate_filename)
-
-    #import pdb; pdb.set_trace()
-    if(feature_list == None or len(feature_list) == 0):
-        print ('No features to be tested.')
-        sys.exit()
-
-    #Arrays to store number of samples and genetic effects
-    n_samples = []
-    n_e_samples = []
-    currentFeatureNumber = 0
-    pvs = list()
-
-
-    for feature_id in tqdm(feature_list):
-        currentFeatureNumber+= 1
-
-        snp_list = snp_feature_filter_df['snp_id'].loc[snp_feature_filter_df['feature'] == feature_id]
-        snpQuery = bim.query('snp in @snp_list')
-
-        if len(snpQuery) != 0:
-            phenotype_ds = phenotype_df.loc[feature_id]
-            contains_missing_samples = any(~np.isfinite(phenotype_ds))
-
-            # import pdb; pdb.set_trace()
-
-            if(contains_missing_samples):
-                print ('Feature: ' + feature_id + ' contains missing data.')
-                phenotype_ds.dropna(inplace = True)
-
-            '''select indices for relevant individuals in genotype matrix
-            These are not unique. NOT to be used to access phenotype/covariates data
-            '''
-            individual_ids = sample2individual_df.loc[phenotype_ds.index, 'iid'].values
-            sample2individual_feature = sample2individual_df.loc[phenotype_ds.index]
-
-            if(contains_missing_samples):
-                tmp_unique_individuals = genetically_unique_individuals
-                genetically_unique_individuals = utils.get_unique_genetic_samples(kinship_df.loc[individual_ids, individual_ids], relatedness_score);
-
-            if phenotype_ds.empty or (len(genetically_unique_individuals) < minimum_test_samples) :
-                print("Feature: " + feature_id + " not tested: not enough samples do QTL test.")
-                # fail_qc_features.append(feature_id)
-                if contains_missing_samples:
-                    genetically_unique_individuals = tmp_unique_individuals
-                continue
-            elif np.var(phenotype_ds.values) == 0:
-                print("Feature: " + feature_id + " has no variance in selected individuals.")
-                # fail_qc_features.append(feature_id)
-                if contains_missing_samples:
-                    genetically_unique_individuals = tmp_unique_individuals
-                continue
-
-
-            #import pdb; pdb.set_trace()
-            n_sample_t = (phenotype_ds.size)
-            n_e_samples_t = (len(genetically_unique_individuals))
-            print ('For feature: ' + str(currentFeatureNumber) + '/' +str(len(feature_list)) + ' (' + feature_id + '): ' + str(snpQuery.shape[0]) + ' SNPs need to be tested.\n Please stand by.')
-
-            # import pdb; pdb.set_trace()
-
-
-            for snpGroup in utils.chunker(snpQuery, blocksize):
-                snp_idxs = snpGroup['i'].values
-                snp_names = snpGroup['snp'].values
-
-
-                #subset genotype matrix, we cannot subselect at the same time, do in two steps.
-                snp_df = pd.DataFrame(data = bed[snp_idxs,:].compute().transpose(), index = fam.index, columns = snp_names)
-                snp_df = snp_df.loc[individual_ids,:]
-
-
-                #We could make use of relatedness when imputing.
-                snp_matrix_DF = pd.DataFrame(fill_NaN.fit_transform(snp_df), index = snp_df.index, columns = snp_df.columns)
-                snp_df = None
-
-#                test if the environments, covariates, kinship, snp and phenotype are in the same order
-                if ((all(snp_matrix_DF.index == kinship_df.loc[individual_ids,individual_ids].index) if kinship_df is not None else True) &\
-                     (all(phenotype_ds.index == covariate_df.loc[sample2individual_feature['sample'],:].index)if covariate_df is not None else True)&\
-                     all(phenotype_ds.index == environment_df.loc[sample2individual_feature['sample'],:].index)&\
-                     all(snp_matrix_DF.index == sample2individual_feature.loc[phenotype_ds.index]['iid'])):
-                    '''
-                    if all lines are in order put in arrays the correct genotype and phenotype
-                    x=a if cond1 else b <---> equivalent to if cond1: x=a else x=b;
-                    better readability of the code
-                    '''
-                    kinship_mat = kinship_df.loc[individual_ids,individual_ids].values if kinship_df is not None else None
-                    env =  environment_df.loc[sample2individual_feature['sample'],:].values
-                    if covariate_df is not None:
-                        cov_matrix =  covariate_df.loc[sample2individual_feature['sample'],:].values
-                    else:
-                        cov_matrix = None
-
-                    phenotype = utils.force_normal_distribution(phenotype_ds.values, method = gaussianize_method) if gaussianize_method is not None else phenotype_ds.values
-                else:
-                    print ('There is an issue in mapping phenotypes and genotypes')
-                    sys.exit()
-
-
-                n_indep_snps = sum(limix.qc.indep_pairwise(snp_matrix_DF.values, window_size, window_size, threshold, verbose = True))
-                print(n_indep_snps)
-                # loop over SNPs to estimate individual effect sizes
-                for snp in snp_names:
-                        g = snp_matrix_DF.loc[:,snp].values
-                        g = g.T.reshape(g.shape[0],1)
-                        y = phenotype
-                        y = y.T.reshape(y.shape[0],1)
-                        W = cov_matrix
-                        X = np.concatenate((W, g[:, np.newaxis]), axis = 1)
-                        E = env
-                        # fit null model
-                        slmm_int = StructLMM2(y, W, E)
-                        null = slmm_int.fit_null(g)
-                        _p = slmm_int.score_2_dof(g)
-                        # import pdb; pdb.set_trace()
-                        pvs.append([snp, feature_id, _p, _p*n_indep_snps])
-
-
-    pvs_df = pd.DataFrame(pvs, columns = ["snp_id","feature_id","p_value","feature_corrected_p_value"])
-
-    if not selectionStart is None :
-        # add if statements
-        pvs_df.to_csv(output_dir + '/eqtl_gxe_pvalue_{}_{}_{}.txt'.format(chromosome, selectionStart, selectionEnd), sep = '\t', index = True)
-    else :
-        # add if statements
-        pvs_df.to_csv(output_dir + '/eqtl_gxe_pvalue_{}.txt'.format(chromosome), sep = '\t', index = True)
-
-
-
-if __name__=='__main__':
-
-        #Variables
-        chunkFile = '/nfs/leia/research/stegle/mjbonder/ChunkFiles/Ensembl_75_Limix_Annotation_FC_Gene_step10.txt'
-        genotypeFile = '/hps/nobackup/hipsci/scratch/genotypes/imputed/2017-03-27/Full_Filtered_SNPs_Plink/hipsci.wec.gtarray.HumanCoreExome.imputed_phased.20170327.genotypes.norm.renamed'
-        annotationFile = '/hps/nobackup/hipsci/scratch/singlecell_endodiff/data_processed/scQTLs/annos/combined_feature_id_annos.tsv'
-        phenotypeFile = '/nfs/leia/research/stegle/acuomo/singlecell_endodiff/data/exploratory_analysis/input_files/full_sc_exprs_pheno.tsv'
-        covariateFile = '/nfs/leia/research/stegle/acuomo/singlecell_endodiff/data/exploratory_analysis/input_files/full_sc_exprs_10pcs_covs.tsv'
-        environmentFile = '/nfs/leia/research/stegle/acuomo/singlecell_endodiff/data/exploratory_analysis/input_files/full_sc_exprs_10pcs_envs.tsv'
-        kinshipFile = '/hps/nobackup/hipsci/scratch/genotypes/imputed/2017-03-27/Full_Filtered_SNPs_Plink-F/hipsci.wec.gtarray.HumanCoreExome.imputed_phased.20170327.genotypes.norm.renamed.kinship'
-        sampleMappingFile = '/nfs/leia/research/stegle/acuomo/singlecell_endodiff/data/exploratory_analysis/input_files/full_sc_exprs_samples.tsv'
-        featureVariantFilter = '/nfs/leia/research/stegle/acuomo/singlecell_endodiff/data/exploratory_analysis/input_files/full_sc_exprs_merged_leads_top10_perstage.tsv'
-        blockSize = '500'
-        output_dir = '/nfs/leia/research/stegle/acuomo/singlecell_endodiff/data/pipeline_snakemakes/output_structlmm_alldays_10PCs/'
-
-        run_structLMM2_int(phenotypeFile, annotationFile, environmentFile, genotypeFile,
-            output_dir, sample_mapping_filename = sampleMappingFile, snp_feature_filename = featureVariantFilter,
-            kinship_filename = kinshipFile, covariates_filename = covariateFile)
-
-
-
diff --git a/setup.cfg b/setup.cfg
index 461c097..ebfda63 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,55 +6,52 @@ classifiers =
     License :: OSI Approved :: MIT License
     Operating System :: OS Independent
     Programming Language :: Python
-description = StructLMM that accounts for population structure.
-download_url = https://github.com/limix/struct-lmm2
-keywords = function, optimisation
+description = A linear mixed model framework to map multivariate context-specific eQTL using single-cell RNA-seq data.
+download_url = https://github.com/limix/CellRegMap
+keywords = scRNA-seq, eQTL, context-specific regulation
 license = MIT
 long_description = file: README.md
 long_description_content_type = text/markdown
 maintainer = Danilo Horta
 platforms = Windows, MacOS, Linux
 maintainer_email = horta@ebi.ac.uk
-name = struct-lmm2
-url = https://github.com/limix/struct-lmm2
+name = cellregmap
+url = https://github.com/limix/CellRegMap
 version = attr: version.get
 
 [options]
 zip_safe = True
 include_package_data = True
 packages = find:
-setup_requires =
-    pytest-runner>=4.2
+python_requires = >= 3.6
 install_requires =
-    brent-search>=1.0.33
-    cachetools>=2.1.0
-    liknorm>=1.2.1
-    numpy-sugar>=1.4.1
-    numpy>=1.14.3
-    pandas>=0.23.1
-    pytest>=3.6.3
-    pytest-doctestplus>=0.3.0
-    scipy>=1.0.1
-    chiscore>=0.2.0
-    struct-lmm>=0.3.1
+    chiscore>=0.2.3
+    glimix-core>=3.1.12
+    numpy-sugar>=1.5.1
+    numpy>=1.17.5
+    pytest>=5.4.3
+    scipy>=1.2.3
+    tqdm>=4.41.1
 
 [aliases]
 test = pytest
 
 [tool:pytest]
 addopts =
-    --doctest-plus
     --doctest-modules
     --doctest-glob='*.rst'
     --ignore="setup.py"
-    --ignore="doc/conf.py"
 doctest_plus = enabled
-doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS ALLOW_UNICODE FLOAT_CMP
-doctest_plus_atol = 1e-03
-doctest_plus_rtol = 1e-03
+doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS ALLOW_UNICODE
 doctest_rst = enabled
-norecursedirs = .eggs .git *.egg-info build .ropeproject .undodir
-pep8ignore = E402 W0212 W0622 R0915
+norecursedirs = .eggs .git *.egg-info build .ropeproject .undodir old_files
+
+[flake8]
+ignore = E501,E741,E203,W503,W0212,W0622,R0915,E743
+
+[pycodestyle]
+ignore = E741,E743,E203
+max-line-length = 88
 
 [tool:isort]
 multi_line_output=3
@@ -66,10 +63,6 @@ line_length=88
 [pylint]
 disable = redefined-builtin,R0915
 
-[flake8]
-ignore = E501 E741 E203 W503 W0212 W0622 R0915
-max-line-length = 88
-
 [rstcheck]
 ignore_substitutions = today, version
 ignore_directives = plot, autofunction, command-output, autmodule, automodule, autoclass, autoattribute, automethod, doctest
diff --git a/setup.py b/setup.py
index f4ebca1..2553605 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,6 @@
-from setuptools import setup
+#!/usr/bin/env python
 
+from setuptools import setup
 
-setup(
-    name="struct-lmm2",
-    maintainer="Anna",
-    description="Nice package",
-    author="Anna",
-    license="BSD",
-    python_requires=">=3.6",
-    install_requires=["numpy>=1.6", "scipy", "glimix_core", "numpy-sugar", "chiscore"],
-)
+if __name__ == "__main__":
+    setup()
diff --git a/simulations1.py b/simulations1.py
deleted file mode 100644
index d8734d3..0000000
--- a/simulations1.py
+++ /dev/null
@@ -1,506 +0,0 @@
-import sys
-
-import numpy as np
-from numpy import (
-    asarray,
-    concatenate,
-    empty,
-    eye,
-    inf,
-    newaxis,
-    ones,
-    sqrt,
-    stack,
-    trace,
-    where,
-    zeros,
-)
-from numpy.linalg import eigvalsh, inv, solve
-from numpy.random import RandomState
-from numpy_sugar.linalg import ddot, economic_qs, economic_svd
-from scipy.linalg import sqrtm
-
-from chiscore import davies_pvalue, optimal_davies_pvalue
-from glimix_core.lmm import LMM
-from struct_lmm import StructLMM
-
-
-""" sample phenotype from the model:
-
-    𝐲 = W𝛂 + 𝐠𝛽 + 𝐠⊙𝛃 + 𝐞 + u + 𝛆
-
-    𝛃 ∼ 𝓝(𝟎, b²Σ)
-    𝐞 ∼ 𝓝(𝟎, e²Σ)
-    𝛆 ∼ 𝓝(𝟎, 𝜀²I)
-    Σ = EEᵀ
-    u ~ 𝓝(𝟎, g²K)
-
-"""
-
-
-def _mod_liu(q, w):
-    from chiscore import liu_sf
-
-    (pv, dof_x, _, info) = liu_sf(q, w, [1] * len(w), [0] * len(w), True)
-    return (pv, info["mu_q"], info["sigma_q"], dof_x)
-
-
-def _qmin(pliumod):
-    from numpy import zeros
-    import scipy.stats as st
-
-    # T statistic
-    T = pliumod[:, 0].min()
-
-    qmin = zeros(pliumod.shape[0])
-    percentile = 1 - T
-    for i in range(pliumod.shape[0]):
-        q = st.chi2.ppf(percentile, pliumod[i, 3])
-        mu_q = pliumod[i, 1]
-        sigma_q = pliumod[i, 2]
-        dof = pliumod[i, 3]
-        qmin[i] = (q - dof) / (2 * dof) ** 0.5 * sigma_q + mu_q
-
-    return qmin
-
-
-# Let Σ = 𝙴𝙴ᵀ
-# 𝐲 ∼ 𝓝(𝙼𝛂, 𝓋₀𝙳(ρ𝟏𝟏ᵀ + (1-ρ)Σ)𝙳 + 𝓋₁(aΣ + (1-a)𝙺) + 𝓋₂𝙸).
-
-seed = int(sys.argv[1])
-random = RandomState(seed)  # set a seed to replicate simulations
-# set sample size
-n_samples = 500
-# simulate MAF (minor allele frequency) distribution
-maf_min = 0.05
-maf_max = 0.45
-n_snps = 20
-
-print(n_samples, "samples")
-print(n_snps, "snps")
-print(maf_min, "min MAF")
-print(maf_max, "max MAF")
-
-"simulate environments"
-
-# two groups
-group_size = n_samples // 2
-
-E = zeros((n_samples, 2))
-
-E[:group_size, 0] = 1
-E[group_size:, 1] = 1
-
-Sigma = E @ E.T
-
-# import pdb; pdb.set_trace()
-
-"simulate genotypes (for n_snps variants)"
-
-
-# Simulate genotypes (for n_snps variants)
-mafs = random.rand(n_snps) * (maf_max - maf_min) + maf_min
-
-# simulate SNPs accordingly
-G = []
-
-for maf in mafs:
-    g = random.choice(
-        [0, 1, 2],
-        p=[(1 - maf) ** 2, 1 - ((1 - maf) ** 2 + maf ** 2), maf ** 2],
-        size=n_samples,
-    )
-    G.append(asarray(g, float))
-
-# We normalize it such that the expectation of 𝔼[𝐠ᵀ𝐠] = 1.
-# i.e. normalize columns
-
-G = stack(G, axis=1)
-G -= G.mean(0)
-G /= G.std(0)
-
-G0 = G.copy()
-G0 /= sqrt(G0.shape[1])
-K = G0 @ G0.T
-
-
-"simulate two SNPs to have persistent effects and two to have interaction effects"
-"one SNP in common, one unique to each category"
-
-idxs_persistent = [5, 6]
-idxs_gxe = [10, 11]
-
-print("MAFs of causal SNPs")
-
-print("{}\t{}".format(idxs_persistent[0], mafs[idxs_persistent[0]]))
-print("{}\t{}".format(idxs_persistent[1], mafs[idxs_persistent[1]]))
-print("{}\t{}".format(idxs_gxe[0], mafs[idxs_gxe[0]]))
-print("{}\t{}".format(idxs_gxe[1], mafs[idxs_gxe[1]]))
-
-# idxs_persistent = [5, 30]
-# idxs_gxe = [30, 45]
-
-# Variances
-#
-# 𝐲 ∼ 𝓝(1 + ∑ᵢ𝐠ᵢ𝛽_gᵢ, σ²_g⋅𝙳𝟏𝟏ᵀ𝙳 + σ²_gxe⋅𝙳Σ𝙳𝙳 + σ²_e⋅Σ + σ²_k⋅𝙺 + σ²_n⋅𝙸.
-# σ²_g + σ²_gxe + σ²_e + σ²_k + σ²_n = 1
-# σ²₁*ρ + σ²₁*(1-ρ) + σ²_e + σ²_k + σ²_n = 1
-
-# The user will provide: σ²₁, ρ
-# And we assume that σ²_e = σ²_k = σ²_n = v
-# v = (1 - σ²₁*ρ + σ²₁*(1-ρ)) / 3
-# σ²_e = a*σ²₂
-# σ²_k = (1-a)*σ²₂
-
-
-"simulate sigma parameters"
-
-rho = 0.8  # contribution of interactions (proportion)
-var_tot_g_gxe = 0.4
-
-print(rho, "rho (prop var explained by GxE)")
-print(var_tot_g_gxe, "tot variance G + GxE")
-
-var_tot_g = (1 - rho) * var_tot_g_gxe
-var_tot_gxe = rho * var_tot_g_gxe
-
-var_g = var_tot_g / len(idxs_persistent)  # split effect across n signals
-var_gxe = var_tot_gxe / len(idxs_gxe)
-
-v = (1 - var_tot_gxe - var_tot_g) / 3
-var_e = v  # environment effect only
-var_k = v  # population structure effect ?
-var_noise = v
-# print(v)
-
-""" (persistent) genotype portion of phenotype:
-
-    𝐲_g = G 𝛃_g
-
-    𝐲_g = ∑ᵢ𝐠ᵢ𝛽_gᵢ,
-
- where 𝐠ᵢ is the i-th column of 𝙶.
-
-"""
-
-
-# simulate (persistent) beta to have causal SNPs as defined
-beta_g = zeros(n_snps)
-beta_g[idxs_persistent] = random.choice([+1, -1], size=len(idxs_persistent))
-beta_g /= beta_g.std()
-beta_g *= var_g
-
-"calculate genoytpe component of y"
-
-y_g = G @ beta_g
-
-
-""" GxE portion of phenotype:
-
-     𝐲_gxe = ∑ᵢ gᵢ x 𝛃ᵢ
-
-"""
-# simulate (GxE) variance component to have causal SNPs as defined
-sigma_gxe = zeros(n_snps)
-sigma_gxe[idxs_gxe] = var_gxe
-
-# for i in range(n_snps):
-#     print('{}\t{}'.format(i,sigma_gxe[i]))
-
-y_gxe = zeros(n_samples)
-u_gxe = ones(n_samples)
-u_gxe[group_size:] = -1
-
-for i in range(n_snps):
-    # beta_gxe = random.multivariate_normal(zeros(n_samples), sigma_gxe[i] * Sigma)
-    beta_gxe = sigma_gxe[i] * u_gxe
-    y_gxe += G[:, i] * beta_gxe
-
-
-e = random.multivariate_normal(zeros(n_samples), v * Sigma)
-u = random.multivariate_normal(zeros(n_samples), v * K)
-eps = random.multivariate_normal(zeros(n_samples), v * eye(n_samples))
-
-e0 = random.multivariate_normal(zeros(n_samples), v * 3 / 2 * Sigma)
-eps0 = random.multivariate_normal(zeros(n_samples), v * 3 / 2 * eye(n_samples))
-
-
-"sum all parts of y"
-y = 1 + y_g + y_gxe + e + u + eps
-y0 = 1 + y_g + y_gxe + e0 + eps0
-
-
-p_values0 = []
-p_values1 = []
-p_values2 = []
-p_values3 = []
-
-print("testing using standard structLMM")
-
-"test using struct LMM (standard)"
-
-
-# y = y.reshape(y.shape[0], 1)
-
-# "Association test"
-
-print(
-    "p-values of association test SNPs",
-    idxs_persistent,
-    idxs_gxe,
-    "should be causal (persistent + GxE)",
-)
-slmm = StructLMM(y0, M=np.ones(n_samples), E=E, W=E)
-slmm.fit(verbose=False)
-
-for i in range(n_snps):
-    g = G[:, i]
-    g = g.reshape(g.shape[0], 1)
-    _p = slmm.score_2dof_assoc(g)
-    print("{}\t{}".format(i, _p))
-    p_values0.append(_p)
-
-## "Interaction test"
-
-print("p-values of interaction test SNPs", idxs_gxe, "should be causal (GxE)")
-
-for i in range(n_snps):
-    g = G[:, i]
-    # g = g.reshape(g.shape[0],1)
-    M = np.ones(n_samples)
-    M = np.stack([M, g], axis=1)
-    slmm_int = StructLMM(y0, M=M, E=E, W=E)
-    slmm_int.fit(verbose=False)
-    _p = slmm_int.score_2dof_inter(g)
-    print("{}\t{}".format(i, _p))
-    p_values1.append(_p)
-
-
-#################################################
-#################################################
-#################################################
-#################################################
-
-# print("using structLMM 2 now")
-
-# "test using struct LMM 2 (in this case it should not be very different)"
-
-## y = y.reshape(y.shape[0], 1)
-
-Cov = {}
-QS_a = {}
-M = ones((n_samples, 1))
-
-# a_values = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
-a_values = [1]
-
-for a in a_values:
-    Cov[a] = a * Sigma + (1 - a) * K
-    QS_a[a] = economic_qs(Cov[a])
-
-# "Association test"
-
-print(
-    "p-values of association test SNPs",
-    idxs_persistent,
-    idxs_gxe,
-    "should be causal (persistent + GxE)",
-)
-
-rhos = [0.0, 0.1 ** 2, 0.2 ** 2, 0.3 ** 2, 0.4 ** 2, 0.5 ** 2, 0.5, 0.999]
-
-for i in range(n_snps):
-    # print(i)
-    g = G[:, i]
-    g = g.reshape(g.shape[0], 1)
-    best = {"lml": -inf, "a": 0, "v0": 0, "v1": 0, "beta": 0}
-    for a in a_values:
-        M = np.ones(n_samples)
-        lmm = LMM(y0, M, QS_a[a], restricted=True)  # cov(y) = v0*(aΣ + (1-a)K) + v1*Is
-        lmm.fit(verbose=False)
-        if lmm.lml() > best["lml"]:
-            best["lml"] = lmm.lml()
-            best["a"] = a
-            best["v0"] = lmm.v0
-            best["v1"] = lmm.v1
-            best["alpha"] = lmm.beta
-
-    "H0 optimal parameters"
-    alpha = lmm.beta[:-1]
-    beta = lmm.beta[-1]
-    # e²Σ + g²K = s²(aΣ + (1-a)K)
-    # e² = s²*a
-    # g² = s²*(1-a)
-    s2 = lmm.v0  # s²
-    eps2 = lmm.v1  # 𝜀²
-
-    "H1 via score test"
-    # Let K₀ = g²K + e²Σ + 𝜀²I
-    # with optimal values e² and 𝜀² found above.
-    K0 = lmm.covariance()
-    X = concatenate((E, g), axis=1)
-    X = M.reshape((-1, 1))
-
-    # import pdb; pdb.set_trace()
-    # Let P₀ = K⁻¹ - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹.
-    K0iX = solve(K0, X)
-    P0 = inv(K0) - K0iX @ solve(X.T @ K0iX, K0iX.T)
-
-    # P₀𝐲 = K⁻¹𝐲 - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹𝐲.
-    K0iy = solve(K0, y0)
-    P0y = K0iy - solve(K0, X @ solve(X.T @ K0iX, X.T @ K0iy))
-    # self._P(self._y).min()
-    # -4.90270502291706
-
-    # import pdb; pdb.set_trace()
-    # The covariance matrix of H1 is K = K₀ + b²diag(𝐠)⋅Σ⋅diag(𝐠)
-    # We have ∂K/∂b² = diag(𝐠)⋅Σ⋅diag(𝐠)
-    # The score test statistics is given by
-    # Q = ½𝐲ᵀP₀⋅∂K⋅P₀𝐲
-
-    dK_G = ddot(g.ravel(), ddot(ones((n_samples, n_samples)), g.ravel()))
-    dK_GxE = ddot(g.ravel(), ddot(Sigma, g.ravel()))
-    # P0 = P0 + 1e-9 * eye(P0.shape[0])
-    # sqrP0 = sqrtm(P0)
-    # import pdb; pdb.set_trace()
-    Q_G = P0y.T @ dK_G @ P0y
-    Q_GxE = P0y.T @ dK_GxE @ P0y
-
-    # the eigenvalues of ½P₀⋅∂K⋅P₀
-    # are tge eigenvalues of
-    gPg = g.T @ P0 @ g
-    goE = g * E
-    gPgoE = g.T @ P0 @ goE
-    gEPgE = goE.T @ P0 @ goE
-
-    nE = E.shape[1] + 1
-    F = empty((nE, nE))
-
-    # lambdas = zeros(len(rhos))
-    lambdas = []
-    Q = []
-    for rho in rhos:
-        # print(ii)
-        # print(rho)
-        Q.append((rho * Q_G + (1 - rho) * Q_GxE) / 2)
-        # dK = rho * dK_GxE + (1 - rho) * dK_G
-        # lambdas.append(eigvalsh((sqrP0 @ dK @ sqrP0) / 2))
-        # lambdas[ii] = eigvalsh(sqrP0 @ dK @ sqrP0) / 2
-        F[0, 0] = rho * gPg
-        F[0, 1:] = sqrt(rho) * sqrt(1 - rho) * gPgoE
-        F[1:, 0] = F[0, 1:]
-        F[1:, 1:] = (1 - rho) * gEPgE
-        lambdas.append(eigvalsh(F) / 2)
-
-    pliumod = stack([_mod_liu(Qi, lam) for Qi, lam in zip(Q, lambdas)], axis=0)
-    qmin = _qmin(pliumod)
-
-    # 3. Calculate quantites that occur in null distribution
-    Px1 = P0 @ g
-    m = 0.5 * (g.T @ Px1)
-    goE = g * E
-    PgoE = P0 @ goE
-    ETxPxE = 0.5 * (goE.T @ PgoE)
-    ETxPx1 = goE.T @ Px1
-    ETxPx11xPxE = 0.25 / m * (ETxPx1 @ ETxPx1.T)
-    ZTIminusMZ = ETxPxE - ETxPx11xPxE
-    eigh = eigvalsh(ZTIminusMZ)
-
-    eta = ETxPx11xPxE @ ZTIminusMZ
-    vareta = 4 * trace(eta)
-
-    OneZTZE = 0.5 * (g.T @ PgoE)
-    tau_top = OneZTZE @ OneZTZE.T
-    tau_rho = empty(len(rhos))
-
-    for ii in range(len(rhos)):
-        # print(ii)
-        # tau_rho[ii] = rhos[ii] * m + (1 - rhos[ii]) / m * tau_top
-        tau_rho[ii] = (1 - rhos[ii]) * m + (rhos[ii]) / m * tau_top
-
-    MuQ = sum(eigh)
-    VarQ = sum(eigh ** 2) * 2 + vareta
-    KerQ = sum(eigh ** 4) / (sum(eigh ** 2) ** 2) * 12
-    Df = 12 / KerQ
-
-    # 4. Integration
-    T = pliumod[:, 0].min()
-    pvalue = optimal_davies_pvalue(
-        qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, rhos, T
-    )
-
-    # Final correction to make sure that the p-value returned is sensible
-    multi = 3
-    if len(rhos) < 3:
-        multi = 2
-    idx = where(pliumod[:, 0] > 0)[0]
-    pval = pliumod[:, 0].min() * multi
-    if pvalue <= 0 or len(idx) < len(rhos):
-        pvalue = pval
-    if pvalue == 0:
-        if len(idx) > 0:
-            pvalue = pliumod[:, 0][idx].min()
-
-    print("{}\t{}".format(i, pvalue))
-    p_values2.append(pvalue)
-    # return pvalue
-
-# sys.exit(0)
-"Interaction test"
-
-
-print("p-values of interaction test SNPs", idxs_gxe, "should be causal (GxE)")
-
-for i in range(n_snps):
-    g = G[:, i]
-    g = g.reshape(g.shape[0], 1)
-    M = np.ones((n_samples, 1))
-    Mg = concatenate((M, g), axis=1)
-    best = {"lml": -inf, "a": 0, "v0": 0, "v1": 0, "beta": 0}
-    for a in a_values:
-        lmm = LMM(y0, Mg, QS_a[a], restricted=True)  # cov(y) = v0*(aΣ + (1-a)K) + v1*Is
-        lmm.fit(verbose=False)
-        if lmm.lml() > best["lml"]:
-            best["lml"] = lmm.lml()
-            best["a"] = a
-            best["v0"] = lmm.v0
-            best["v1"] = lmm.v1
-            best["alpha"] = lmm.beta
-
-    "H0 optimal parameters"
-    alpha = lmm.beta[:-1]
-    beta = lmm.beta[-1]
-    # e²Σ + g²K = s²(aΣ + (1-a)K)
-    # e² = s²*a
-    # g² = s²*(1-a)
-    s2 = lmm.v0  # s²
-    eps2 = lmm.v1  # 𝜀²
-
-    "H1 via score test"
-    # Let K₀ = g²K + e²Σ + 𝜀²I
-    # with optimal values e² and 𝜀² found above.
-    K0 = lmm.covariance()
-    X = concatenate((E, g), axis=1)
-
-    # Let P₀ = K⁻¹ - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹.
-    K0iX = solve(K0, X)
-    P0 = inv(K0) - K0iX @ solve(X.T @ K0iX, K0iX.T)
-
-    # P₀𝐲 = K⁻¹𝐲 - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹𝐲.
-    K0iy = solve(K0, y0)
-    P0y = K0iy - solve(K0, X @ solve(X.T @ K0iX, X.T @ K0iy))
-
-    # The covariance matrix of H1 is K = K₀ + b²diag(𝐠)⋅Σ⋅diag(𝐠)
-    # We have ∂K/∂b² = diag(𝐠)⋅Σ⋅diag(𝐠)
-    # The score test statistics is given by
-    # Q = ½𝐲ᵀP₀⋅∂K⋅P₀𝐲
-    dK = ddot(g.ravel(), ddot(Sigma, g.ravel()))
-    Q = (P0y.T @ dK @ P0y) / 2
-
-    # Q is the score statistic for our interaction test and follows a linear combination
-    # of chi-squared (df=1) distributions:
-    # Q ∼ ∑λχ², where λᵢ are the non-zero eigenvalues of ½√P₀⋅∂K⋅√P₀.
-    sqrP0 = sqrtm(P0)
-    pval = davies_pvalue(Q, (sqrP0 @ dK @ sqrP0) / 2)
-    print("{}\t{}".format(i, pval))
-    p_values3.append(pval)
diff --git a/struct_lmm2/__init__.py b/struct_lmm2/__init__.py
deleted file mode 100644
index 5a07dae..0000000
--- a/struct_lmm2/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .struct_lmm2 import StructLMM2
-
-__version__ = "0.0.1"
diff --git a/struct_lmm2/struct_lmm2.py b/struct_lmm2/struct_lmm2.py
deleted file mode 100644
index aecd737..0000000
--- a/struct_lmm2/struct_lmm2.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import numpy as np
-from numpy import concatenate, inf, newaxis
-from numpy.linalg import eigvalsh, inv, solve
-from numpy.random import RandomState
-from numpy_sugar.linalg import ddot, economic_qs, economic_svd
-from scipy.linalg import sqrtm
-
-from chiscore import davies_pvalue, optimal_davies_pvalue
-from glimix_core.lmm import LMM
-
-
-class StructLMM2:
-    r"""
-    Mixed-model with genetic effect heterogeneity.
-
-    The extended StructLMM model (two random effects) is
-
-        𝐲 = 𝙼𝛂 + 𝐠𝛽 + 𝐠⊙𝛃 + 𝐞 + 𝐮 + 𝛆,                                          (1)
-
-    where
-
-        (𝐠⊙𝛃)ᵢ = 𝐠ᵢ𝛃ᵢ
-        𝛽 ∼ 𝓝(0, 𝓋₀ρ₀),
-        𝛃 ∼ 𝓝(𝟎, 𝓋₀(1-ρ₀)𝙴𝙴ᵀ),
-        𝐞 ∼ 𝓝(𝟎, 𝓋₁ρ₁𝚆𝚆ᵀ),
-        𝐮 ~ 𝓝(𝟎, 𝓋₁(1-ρ₁)𝙺), and
-        𝛆 ∼ 𝓝(𝟎, 𝓋₂𝙸).
-
-    The matrices 𝙴 and 𝚆 are generally the same, and represent the environment
-    configuration for each sample.
-    The parameter ρ ∈ [𝟶, 𝟷] dictates the relevance of genotype-environment interaction
-    versus the genotype effect alone.
-    The term 𝐞 accounts for additive environment-only effects while 𝛆 accounts for
-    noise effects.
-    The term 𝐮 accounts for population structure.
-
-    The above model is equivalent to
-
-        𝐲 = 𝙼𝛂 + 𝐠⊙𝛃 + 𝐞 + 𝐮 + 𝛆,                                               (2)
-
-    where
-
-        𝛃 ∼ 𝓝(𝟎, 𝓋₀(ρ₀𝟏𝟏ᵀ + (1-ρ₀)𝙴𝙴ᵀ)),
-        𝐞 ∼ 𝓝(𝟎, 𝓋₁ρ₁𝚆𝚆ᵀ),
-        𝐮 ~ 𝓝(𝟎, 𝓋₁(1-ρ₁)𝙺), and
-        𝛆 ∼ 𝓝(𝟎, 𝓋₂𝙸).
-
-    Notice that the 𝛃 in Eqs. (1) and (2) are not the same.
-    Its marginalised form is given by
-
-        𝐲 ∼ 𝓝(𝙼𝛂, 𝓋₀𝙳(ρ₀𝟏𝟏ᵀ + (1-ρ₀)𝙴𝙴ᵀ)𝙳 + 𝓋₁(ρ₁𝚆𝚆ᵀ + (1-ρ₁)𝙺) + 𝓋₂𝙸),
-
-    where 𝙳 = diag(𝐠).
-
-    StructLMM method is used to perform two types of statistical tests.
-    The association one compares the following hypotheses:
-
-        𝓗₀: 𝓋₀ = 0
-        𝓗₁: 𝓋₀ > 0
-
-    𝓗₀ denotes no genetic association, while 𝓗₁ models any genetic association.
-    In particular, 𝓗₁ includes genotype-environment interaction as part of genetic
-    association.
-    The interaction test is slightly more complicated as the term 𝐠𝛽 in Eq. (1) is now
-    considered a fixed one.
-    In pratice, however, we instead include 𝐠 in the covariates matrix 𝙼 and set ρ = 0
-    in Eq. (2).
-    We refer to this modified model as the interaction model.
-    The compared hypotheses are:
-
-        𝓗₀: 𝓋₀ = 0 (given the interaction model)
-        𝓗₁: 𝓋₀ > 0 (given the interaction model)
-
-    Implementation
-    --------------
-
-    Let Σ₀ = 𝙴𝙴ᵀ and Σ₁ = 𝚆𝚆ᵀ.
-    Therefore,
-
-        𝐲 ∼ 𝓝(𝙼𝛂, 𝓋₀𝙳(ρ₀𝟏𝟏ᵀ + (1-ρ₀)Σ₀)𝙳 + 𝓋₁(ρ₁Σ₁ᵀ + (1-ρ₁)𝙺) + 𝓋₂𝙸).
-
-    Let C(ρ₁) =
-    """
-
-    def __init__(self, y, M, E, W=None, K=None):
-
-        #         The above model is equivalent to
-
-        #     𝐲 = 𝙼𝛂 + 𝐠⊙𝛃 + 𝐞 + 𝐮 + 𝛆,                        (2)
-
-        # where
-
-        #     𝛃 ∼ 𝓝(𝟎, 𝓋₀(ρ𝟏𝟏ᵀ + (1-ρ)𝙴𝙴ᵀ)),
-        #     𝐞 ∼ 𝓝(𝟎, 𝓋₁𝚆𝚆ᵀ),
-        #     𝐮 ~ 𝓝(𝟎, g²𝙺), and
-        #     𝛆 ∼ 𝓝(𝟎, 𝓋₂𝙸).
-        if W is None:
-            W = E
-
-        self._y = y
-        self._M = M
-        self._E = E
-        self._W = W
-        self._K = K
-        self._Sigma0 = E @ E.T
-        self._Sigma1 = W @ W.T
-
-        self._rho0 = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
-        self._rho1 = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
-
-        self.Cov = {}
-        self.QS_a = {}
-        for a in self.a_values:
-            self.Cov[a] = a * self.Sigma + (1 - a) * self.K
-            self.QS_a[a] = economic_qs(self.Cov[a])
-
-    def fit_null(self, g):
-        self.X = concatenate((self.W, g[:, newaxis]), axis=1)
-        best = {"lml": -inf, "a": 0, "v0": 0, "v1": 0, "beta": 0}
-        for a in self.a_values:
-            # cov(y) = v0*(aΣ + (1-a)K) + v1*I
-            lmm = LMM(self.y, self.X, self.QS_a[a], restricted=True)
-            lmm.fit(verbose=False)
-            if lmm.lml() > best["lml"]:
-                best["lml"] = lmm.lml()
-                best["a"] = a
-                best["v0"] = lmm.v0
-                best["v1"] = lmm.v1
-                best["alpha"] = lmm.beta
-                best["covariance"] = lmm.covariance()
-        self.best = best
-
-    def score_2_dof(self, g):
-        alpha = self.best["alpha"][:-1]
-        beta = self.best["alpha"][-1]
-        # e²Σ + g²K = s²(aΣ + (1-a)K)
-        # e² = s²*a
-        # g² = s²*(1-a)
-        s2 = self.best["v0"]  # s²
-        eps2 = self.best["v1"]  # 𝜀²
-
-        # H1 via score test
-        # Let K₀ = g²K + e²Σ + 𝜀²I
-        # with optimal values e² and 𝜀² found above.
-        K0 = self.best["covariance"]
-
-        # Let P₀ = K⁻¹ - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹.
-        K0iX = solve(K0, self.X)
-        P0 = inv(K0) - K0iX @ solve(self.X.T @ K0iX, K0iX.T)
-
-        # P₀𝐲 = K⁻¹𝐲 - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹𝐲.
-        K0iy = solve(K0, self.y)
-        P0y = K0iy - solve(K0, self.X @ solve(self.X.T @ K0iX, self.X.T @ K0iy))
-
-        # The covariance matrix of H1 is K = K₀ + b²diag(𝐠)⋅Σ⋅diag(𝐠)
-        # We have ∂K/∂b² = diag(𝐠)⋅Σ⋅diag(𝐠)
-        # The score test statistics is given by
-        # Q = ½𝐲ᵀP₀⋅∂K⋅P₀𝐲
-        dK = ddot(g, ddot(self.Sigma, g))
-        Q = (P0y.T @ dK @ P0y) / 2
-
-        # Q is the score statistic for our interaction test and follows a linear combination
-        # of chi-squared (df=1) distributions:
-        # Q ∼ ∑λχ², where λᵢ are the non-zero eigenvalues of ½√P₀⋅∂K⋅√P₀.
-        sqrP0 = sqrtm(P0)
-        # lambdas = eigvalsh((sqrP0 @ dK @ sqrP0) / 2)
-        # lambdas = lambdas[lambdas > epsilon.small]
-        # print(lambdas)
-        # print(Q)
-        pval = davies_pvalue(Q, (sqrP0 @ dK @ sqrP0) / 2)
-        return pval
-
-
-def _mod_liu(q, w):
-    from chiscore import liu_sf
-
-    (pv, dof_x, _, info) = liu_sf(q, w, [1] * len(w), [0] * len(w), True)
-    return (pv, info["mu_q"], info["sigma_q"], dof_x)
diff --git a/struct_lmm2/test/test_struct_lmm2.py b/struct_lmm2/test/test_struct_lmm2.py
deleted file mode 100644
index 193fc68..0000000
--- a/struct_lmm2/test/test_struct_lmm2.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import numpy as np
-import scipy.stats as stats
-from numpy import ones
-from numpy.random import RandomState
-from numpy.testing import assert_allclose
-
-from struct_lmm import StructLMM
-from struct_lmm2 import StructLMM2
-
-
-def test_structlmm_int_2kinships():
-    random = RandomState(0)
-    n = 100
-    c = 2
-    y = random.randn(n)
-    M = ones((n, 1))
-    W = random.randn(n, c)
-    E = random.randn(n, 4)
-
-    p_values1 = []
-    p_values2 = []
-
-    for i in range(10):
-        random = RandomState(i)
-        g = random.randn(n)
-        # X = np.concatenate((W, g[:, np.newaxis]), axis=1)
-        lmm2 = StructLMM2(y, W, E)
-        lmm2.fit_null(g)
-        _p = lmm2.score_2_dof(g)
-        p_values2.append(_p)
-
-        y = y.reshape(y.shape[0], 1)
-        lmm = StructLMM(y, M, E)
-        g = g.reshape(g.shape[0], 1)
-        covs1 = np.hstack((W, g))
-        lmm.fit(covs1)
-        _p = lmm.score_2dof_inter(g)
-        p_values1.append(_p)
-
-    print(stats.pearsonr(p_values2, p_values1)[0])
-    assert_allclose(p_values2, p_values1, rtol=1e-4)
diff --git a/struct_lmm_int_2kinships.py b/struct_lmm_int_2kinships.py
deleted file mode 100644
index 00b1cbd..0000000
--- a/struct_lmm_int_2kinships.py
+++ /dev/null
@@ -1,160 +0,0 @@
-"""
-Mixed-model with genetic effect heterogeneity.
-
-The extended StructLMM model (two random effects) is ::
-
-    𝐲 = W𝛂 + 𝐠𝛽 + 𝐠⊙𝛃 + 𝐞 + u + 𝛆,
-
-where ::
-
-    𝐠⊙𝛃 = ∑ᵢ𝐠ᵢ𝛽ᵢ
-    𝛃 ∼ 𝓝(𝟎, b²Σ)
-    𝐞 ∼ 𝓝(𝟎, e²Σ)
-    𝛆 ∼ 𝓝(𝟎, 𝜀²I)
-    Σ = EEᵀ
-    u ~ 𝓝(𝟎, g²K)
-
-If one considers 𝛽 ∼ 𝓝(0, p²), we can insert
-𝛽 into 𝛃 ::
-
-    𝛃_ ∼ 𝓝(𝟎, p²𝟏𝟏ᵀ + b²Σ)
-
-"""
-import numpy as np
-from numpy import concatenate, inf, newaxis
-from numpy.linalg import eigvalsh, inv, solve
-from numpy.random import RandomState
-from numpy_sugar import epsilon
-from numpy_sugar.linalg import ddot, economic_qs, economic_svd
-from scipy.linalg import sqrtm
-from chiscore import davies_pvalue, mod_liu, optimal_davies_pvalue
-
-from glimix_core.lmm import LMM
-
-from  time import time
-start = time()
-
-random = RandomState(0)
-n = 10
-c = 2
-y = random.randn(n)
-W = random.randn(n, c)
-# g = random.randn(n)
-E = random.randn(n, 4)
-Sigma = E @ E.T
-X = random.randn(n, 5)
-K = X @ X.T
-
-# print()
-
-# 𝐲 = W𝛂 + 𝐠𝛽 + 𝐠⊙𝛃 + 𝐞 + 𝛆,
-# 𝐞 ∼ 𝓝(𝟎, e²Σ)
-# Σ = EEᵀ
-# QS = economic_qs(Sigma)
-
-# 𝐲 = W𝛂 + 𝐞 + 𝛆
-# 𝓝(𝐲 | W𝛂, e²Σ + 𝜀²I)
-
-# precompute weighted sum of Sigma and K for set values of a
-Cov = {}
-QS_a = {}
-
-a_values = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
-
-for a in a_values:
-    Cov[a] = a * Sigma + (1 - a) * K
-    QS_a[a] = economic_qs(Cov[a])
-    # print(QS_a[a][0][1].shape[1])
-
-
-# print(time() - start)
-"""
-Interaction test
-----------------
-H0: b² = 0 => 𝐲 = W𝛂 + 𝐠𝛽 + 𝐞 + u + 𝛆
-    𝐲 ∼ 𝓝(W𝛂 + 𝐠𝛽, e²Σ + g²K + 𝜀²I)
-H1: b² > 0 => 𝐲 = W𝛂 + 𝐠𝛽 + 𝐠⊙𝛃 + 𝐞 + u + 𝛆
-    𝐲 ∼ 𝓝(W𝛂 + 𝐠𝛽, e²Σ + g²K + 𝜀²I + b²Σ)
-"""
-start = time()
-for i in range(20):
-    print(i)
-    random = RandomState(i)
-    g = random.randn(n)
-    X = concatenate((W, g[:, newaxis]), axis = 1)
-    # X_SVD = economic_svd(X)
-    best = {"lml": -inf, "a": 0, "v0": 0, "v1": 0, "beta": 0}
-    for a in a_values:
-        # cov(y) = v0*(aΣ + (1-a)K) + v1*I
-        lmm = LMM(y, X, QS_a[a], restricted = True)
-        lmm.fit(verbose = False)
-        if lmm.lml() > best["lml"]:
-            best["lml"] = lmm.lml()
-            best["a"] = a
-            best["v0"] = lmm.v0
-            best["v1"] = lmm.v1
-            best["alpha"] = lmm.beta
-
-    # The way LMM represents: 𝓝(y|Xb, scale * ((1-δ)K  + δI))
-    # lmm.delta = 0.1
-    # lmm.scale = 3.4
-    # lmm.fix("scale")
-    # lmm.fix("delta")
-
-# H0 optimal parameters
-    alpha = lmm.beta[:-1]
-    beta = lmm.beta[-1]
-    # e²Σ + g²K = s²(aΣ + (1-a)K)
-    # e² = s²*a
-    # g² = s²*(1-a)
-    s2 = lmm.v0  # s²
-    eps2 = lmm.v1  # 𝜀²
-
-    # H1 via score test
-    # Let K₀ = g²K + e²Σ + 𝜀²I
-    # with optimal values e² and 𝜀² found above.
-    K0 = lmm.covariance()
-
-    # Let P₀ = K⁻¹ - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹.
-    K0iX = solve(K0, X)
-    P0 = inv(K0) - K0iX @ solve(X.T @ K0iX, K0iX.T)
-
-    # P₀𝐲 = K⁻¹𝐲 - K₀⁻¹X(XᵀK₀⁻¹X)⁻¹XᵀK₀⁻¹𝐲.
-    K0iy = solve(K0, y)
-    P0y = K0iy - solve(K0, X @ solve(X.T @ K0iX, X.T @ K0iy))
-
-    # The covariance matrix of H1 is K = K₀ + b²diag(𝐠)⋅Σ⋅diag(𝐠)
-    # We have ∂K/∂b² = diag(𝐠)⋅Σ⋅diag(𝐠)
-    # The score test statistics is given by
-    # Q = ½𝐲ᵀP₀⋅∂K⋅P₀𝐲
-    dK = ddot(g, ddot(Sigma, g))
-    Q = (P0y.T @ dK @ P0y) / 2
-
-    # Q is the score statistic for our interaction test and follows a linear combination
-    # of chi-squared (df=1) distributions:
-    # Q ∼ ∑λχ², where λᵢ are the non-zero eigenvalues of ½√P₀⋅∂K⋅√P₀.
-    sqrP0 = sqrtm(P0)
-    # lambdas = eigvalsh((sqrP0 @ dK @ sqrP0) / 2)
-    # lambdas = lambdas[lambdas > epsilon.small]
-    # print(lambdas)
-    # print(Q)
-    pval = davies_pvalue(Q, (sqrP0 @ dK @ sqrP0) / 2)
-    print(pval)
-    # p_values2[i] = pval
-
-
-# print(time() - start)
-# print((time() - start)/100)
-
-    # # compare to StructLMM (int)
-
-    from struct_lmm import StructLMM
-    y = y.reshape(y.shape[0],1)
-    slmm_int = StructLMM(y, E, W = E, rho_list = [0])
-    g = g.reshape(g.shape[0],1)
-    covs1 = np.hstack((W, g))
-    null = slmm_int.fit_null(F = covs1, verbose = False)
-    _p = slmm_int.score_2_dof(g)
-    # p_values1[i] = _p
-    print(_p)
-
diff --git a/version.py b/version.py
new file mode 100644
index 0000000..a3bbb9e
--- /dev/null
+++ b/version.py
@@ -0,0 +1,17 @@
+import re
+from os.path import join
+
+from setuptools import find_packages
+
+
+def get():
+    pkgnames = find_packages()
+    if len(pkgnames) == 0:
+        return "unknown"
+    pkgname = pkgnames[0]
+    content = open(join(pkgname, "__init__.py")).read()
+    c = re.compile(r"__version__ *= *('[^']+'|\"[^\"]+\")")
+    m = c.search(content)
+    if m is None:
+        return "unknown"
+    return m.groups()[0][1:-1]