|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 2, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [], |
| 8 | + "source": [ |
| 9 | + "import numpy as np\n", |
| 10 | + "import matplotlib.pyplot as plt\n", |
| 11 | + "import pandas as pd\n", |
| 12 | + "import seaborn as sns" |
| 13 | + ] |
| 14 | + }, |
| 15 | + { |
| 16 | + "cell_type": "code", |
| 17 | + "execution_count": 3, |
| 18 | + "metadata": {}, |
| 19 | + "outputs": [], |
| 20 | + "source": [ |
| 21 | + "t2g = pd.read_csv('t2gmap.csv', header=0)\n", |
| 22 | + "encode = pd.read_csv('ENCFF068NRZ.tsv', sep='\\t', header=0)" |
| 23 | + ] |
| 24 | + }, |
| 25 | + { |
| 26 | + "cell_type": "code", |
| 27 | + "execution_count": 4, |
| 28 | + "metadata": {}, |
| 29 | + "outputs": [], |
| 30 | + "source": [ |
| 31 | + "for index, row in encode.iterrows():\n", |
| 32 | + " target_id = encode.iloc[index]['gene_id']\n", |
| 33 | + " encode.at[index, 'gene_id'] = target_id.split('.')[0]" |
| 34 | + ] |
| 35 | + }, |
| 36 | + { |
| 37 | + "cell_type": "code", |
| 38 | + "execution_count": 5, |
| 39 | + "metadata": {}, |
| 40 | + "outputs": [], |
| 41 | + "source": [ |
| 42 | + "target_ids = []\n", |
| 43 | + "for i in range(18):\n", |
| 44 | + " tf = 'KLF' + str(i+1)\n", |
| 45 | + " if tf == 'KLF13':\n", |
| 46 | + " target_ids.append(t2g[t2g['ext_gene']==tf]['ens_gene'].to_list()[2])\n", |
| 47 | + " else: \n", |
| 48 | + " target_ids.append(t2g[t2g['ext_gene']==tf]['ens_gene'].to_list()[0])" |
| 49 | + ] |
| 50 | + }, |
| 51 | + { |
| 52 | + "cell_type": "code", |
| 53 | + "execution_count": 6, |
| 54 | + "metadata": {}, |
| 55 | + "outputs": [ |
| 56 | + { |
| 57 | + "name": "stderr", |
| 58 | + "output_type": "stream", |
| 59 | + "text": [ |
| 60 | + "<ipython-input-6-b06263ed169c>:3: SettingWithCopyWarning: \n", |
| 61 | + "A value is trying to be set on a copy of a slice from a DataFrame.\n", |
| 62 | + "Try using .loc[row_indexer,col_indexer] = value instead\n", |
| 63 | + "\n", |
| 64 | + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", |
| 65 | + " subset['ext_gene']=subset['gene_id'].map(t2g_dict)\n" |
| 66 | + ] |
| 67 | + } |
| 68 | + ], |
| 69 | + "source": [ |
| 70 | + "t2g_dict = dict(zip(t2g['ens_gene'], t2g['ext_gene']))\n", |
| 71 | + "subset = encode[encode['gene_id'].isin(target_ids)]\n", |
| 72 | + "subset['ext_gene']=subset['gene_id'].map(t2g_dict)\n", |
| 73 | + "subset = subset.sort_values(by='ext_gene')\n", |
| 74 | + "subset['klf-number']=[1,10,11,12,13,14,15,16,17,18,2,3,4,5,6,7,8,9]\n", |
| 75 | + "subset = subset.sort_values(by='klf-number')" |
| 76 | + ] |
| 77 | + }, |
| 78 | + { |
| 79 | + "cell_type": "code", |
| 80 | + "execution_count": 7, |
| 81 | + "metadata": {}, |
| 82 | + "outputs": [], |
| 83 | + "source": [ |
| 84 | + "lower_bound = np.asarray(subset['TPM']-subset['TPM_ci_lower_bound'])\n", |
| 85 | + "lower_bound = lower_bound*(lower_bound>0)\n", |
| 86 | + "upper_bound = np.asarray(subset['TPM_ci_upper_bound']-subset['TPM'])\n", |
| 87 | + "upper_bound = upper_bound*(upper_bound>0)" |
| 88 | + ] |
| 89 | + }, |
| 90 | + { |
| 91 | + "cell_type": "code", |
| 92 | + "execution_count": 8, |
| 93 | + "metadata": {}, |
| 94 | + "outputs": [ |
| 95 | + { |
| 96 | + "data": { |
| 97 | + "image/png": "iVBORw0KGgoAAAANSUhEUgAAALoAAABkCAYAAAAxOiquAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAIkElEQVR4nO3df4wcZR3H8ffXWKhCaAVqsaBeqWdNzoCtxB80BFFrj7tD+4eg1qSCiSU9SbRFkWDVFfFHQ1oLxm4kNUKk/EGCVnttLjaaHk00sSU9osQfZ2mLeAq9WBEL0tZ+/OOZ2+zNze5db2d3Z3e+r2Sy3Xlmnnlu9jvPPn2enXlMEs61u1c1uwDONYIHussFD3SXCx7oLhc80F0ueKC7XGhqoHd3dwvwxZc0l0RNDfSxsbFmHt7liDddXC5kOtALhQJmVloKhUKzi+Ra1KubXYBKisUi8+fPp7OzE4B169Y1uUSulWW6RncuLR7oLhcyHegDAwOMjIwwMjJCf38/AwMDzS6Sa1GZbaMD9PX10dfX1+xiuDaQ6RrdubSkUqObWRewAlgIHAAuBs6TdHca+TtXq1RqdElPAc8BlwHvkLQJwMzmxrc1szVmdsDMDhw7diyNwzs3pdSaLpK2A9uAjvFVFbZ7QNJVkq6aN29eWod3rqq0mi7dwBXAImCHmd0OIOlfaeTvXK1SCXRJg8BgGnk5Vw/e6+JywQPd5YIHussFD3SXCx7oLhc80F0ueKC7XPBAd7ngge5ywQPd5YIHussFD3SXCx7oLhcq/nrRzIaBvcAZAEnrq2x7DXA10An8inCn0RzgS/K5Y1wGVKvRPwMcB44AX62WiaR9kjZG294o6ZvA74Er49v6HUauGaoF+jDwF8LtcRdMlZGZrQIOE4K9Ir/DyDVDtUD/I/Be4G/AR6tlYmY3AauBecCwmd0FdAFPplRO52pS7Q6j9wOzgFNTZSLpUeDRtArlXNqqBfoXo9dhSdsaURjn6qVa0+VpSbcxjfa5c1lXLdC/YGabgaXRq3Mtq1rT5X5gK2ANKotzdVMt0K8FziEEugB/vJxrWdUCfVDSfQ0riXN1VK2N/v2GlaIOfP4jV65ijS7pdCMLkiaf/8jFte2vF322DFcu0zNe1MJny3Dl2rZGd66cB7rLBQ90lwtpTQTwVuAuYAdwEliC32HkMiStOYz+DDwYvV3udxi5rGl406Ud7jDK6mBUVsuVBWk1XS4h3IX0GuDx6A6jOcCP08g/awqFAnv37gUovWZBVsuVBWnNYfQP4LY08sq6YrEIwOjo6IT3a9eubVqZysuRtXJlhfe6zICPuraeth0ZradmjLoWi0UGBgbYvXt3aV1PTw+7du0qvR+/AAH6+/vp6enxGj3igd5G/GcPlXmgtxAP5JnzNrrLBQ/0jCgWi/T29k7oB+/t7W12sdqGB7oD2n+wydvoGdLMNni7DzZ5je5KzaahoSGGhoZSbzZl4dvCa3QH1O/bpFgssn///gnr4u8bwQPd1V0WukW96eJq0iq9RakHupktNbN7zGyzmZ2Xdv71VusHl4X2qJusHjX6J4AC4W6j5XXIvybFYpHOzs4JwTj+/Jc08p6qPdqOF0JfXx9bt24tLWfTTGnU+bC073Qzs3uBO4FlwIWSdsTS1wBroreLgT9NI9uLgbHYusXA+WXv/1Mhr6R9p5NWa3qltAXAG8re/x0YbUC5ajlfaZzrSulTnY/pHnvcmKTuSWslpboA7wS+AWwCzk8pzwMzSavnvl6ubOU91ZJ6r4ukJ4An0s7XuVp4r4vLhVYJ9AdmmFbPfeuZt5fr7NOrSv0/o85lUavU6M7VJNM/ASh/Aphi3ZRR+jXA1UAncIekf5aldQErgIXA1yVN6vYys17gVkkfTkhbCVxHmA37PpV99ZnZQuBThK6uH0h6MbbvtYSnla0EbpZ0pCztBsJExXOBTZIOxfa9HngXcBFwp6SX4ueC2NPQor9/PG00+ndB0nDCvqeBtwOXA5+VdCqW/lx0TjuAddF2pc/AzFYD10m6JSHvjqhMz0r6YUL6s4TP5GVJm2Np/wMWAR8DPijpRCx9PnAh8BZgvaQXOAuZrtE18QlgSen7JG0kTMs+N5b2FOFDu4yESYHNbAkwG3i6QvYngBcJkwrHz9OtwPhFNSlvSUPA94A/lAd55GXCB/Za4PmE43YD9xL6iksDbqryNLTyNEm/JQQGSftKGpD0HeAVwhxV8fTfRH/T64Ez5Wlmthw4CryQlDdwnHARzq6Q/ulo39NmZrHj7gQeAvZJOpGw70uEmckB/p1w3qrKdKBPh5mtAg5LmhSwkrYD24A3Jex6PfBGYEkU9PF990jaABwiTFxWbjbwS+B3QKXfB6wEfp6wfjHhGTg/SsgXwgXSD7yNaczaPRNm9jlg53hAxUm6H9hDuCDLfYDwTbLEzBYl7PeQpG8D55rZ5QlZX0II3FeApQnptxDOS5KLJK0HHge6KmxTUdabLqUngJnZQUlHY+k3AauBQTN7c3m6mXUDVxC+Dr8Wz1vSt6LtOiQdTDj2+4B3E5o+X44lPxgddxZhYCzJCkLNHzcGbCB8xX83If3c6PV54Bdl5an4NLTYeToOfAjoMrOjko7H0q8kXERmZvsT0ucQavNOYHvsuHdLOhqds0PxckX7LgAuJTRT4uX+GXA78Lp43mZ2EFgsqTSnbWzfZ8zsDsI39E8qnPOKvNfF5ULLN12cmw4PdJcLHuguFzzQXS5kutel1ZnZzcAw8B5C3+85wHDZQM4Q8FPg11H/d03HGc/XTeaBXn8bCCOr+6KATFKa48bMHgF2Acsk9ZvZFkmfN7MthIGgVYTBrDFCV+ST0a6rzOxGQt/9pYQuxAuAjcDDwCPAY5L+m+6f1xq86VJ/RwiDLEkOStoi6XDZumPRQNfJ2LYWve4hDCqdJNyyuCxaPwjcA3wkWkYJF8NC4JCk7XkNcvAavREeBhaY2VeAvwKfjAajKs0eEB/YmGVmHycM4kAYLT0DnJJ0xszGK6tuwsjlTsKtaR2EYfNnou1zzQeMXC5408Xlgge6ywUPdJcLHuguFzzQXS54oLtc8EB3ufB/TRDNV7fb9UsAAAAASUVORK5CYII=\n", |
| 98 | + "text/plain": [ |
| 99 | + "<Figure size 192x86.4 with 1 Axes>" |
| 100 | + ] |
| 101 | + }, |
| 102 | + "metadata": { |
| 103 | + "needs_background": "light" |
| 104 | + }, |
| 105 | + "output_type": "display_data" |
| 106 | + } |
| 107 | + ], |
| 108 | + "source": [ |
| 109 | + "fig, ax = plt.subplots(figsize=(4/1.5,3/2.5))\n", |
| 110 | + "fontsize=6\n", |
| 111 | + "plt.bar(x=subset['klf-number'], height=subset['TPM'], color='#969696')\n", |
| 112 | + "plt.errorbar(x=subset['klf-number'], y=subset['TPM'], yerr=[lower_bound, upper_bound], fmt='none', c='black', capsize=2)\n", |
| 113 | + "plt.xticks([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18])\n", |
| 114 | + "ax.set_xticklabels([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18], fontsize = fontsize)\n", |
| 115 | + "plt.ylabel('TPM', fontsize=fontsize)\n", |
| 116 | + "plt.xlabel('KLF number', fontsize=fontsize)\n", |
| 117 | + "plt.tick_params(axis='both', labelsize=fontsize)\n", |
| 118 | + "ax.spines['top'].set_visible(False)\n", |
| 119 | + "ax.spines['right'].set_visible(False)\n", |
| 120 | + "plt.savefig('KLF_expression.pdf', dpi=300)" |
| 121 | + ] |
| 122 | + }, |
| 123 | + { |
| 124 | + "cell_type": "code", |
| 125 | + "execution_count": 9, |
| 126 | + "metadata": {}, |
| 127 | + "outputs": [], |
| 128 | + "source": [ |
| 129 | + "target_ids = []\n", |
| 130 | + "for i in range(9):\n", |
| 131 | + " tf = 'SP' + str(i+1)\n", |
| 132 | + " target_ids.append(t2g[t2g['ext_gene']==tf]['ens_gene'].to_list()[0])" |
| 133 | + ] |
| 134 | + }, |
| 135 | + { |
| 136 | + "cell_type": "code", |
| 137 | + "execution_count": 10, |
| 138 | + "metadata": {}, |
| 139 | + "outputs": [ |
| 140 | + { |
| 141 | + "name": "stderr", |
| 142 | + "output_type": "stream", |
| 143 | + "text": [ |
| 144 | + "<ipython-input-10-b44ec6e27bb2>:3: SettingWithCopyWarning: \n", |
| 145 | + "A value is trying to be set on a copy of a slice from a DataFrame.\n", |
| 146 | + "Try using .loc[row_indexer,col_indexer] = value instead\n", |
| 147 | + "\n", |
| 148 | + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", |
| 149 | + " subset['ext_gene']=subset['gene_id'].map(t2g_dict)\n" |
| 150 | + ] |
| 151 | + } |
| 152 | + ], |
| 153 | + "source": [ |
| 154 | + "subset = encode[encode['gene_id'].isin(target_ids)]\n", |
| 155 | + "t2g_dict = dict(zip(t2g['ens_gene'], t2g['ext_gene']))\n", |
| 156 | + "subset['ext_gene']=subset['gene_id'].map(t2g_dict)\n", |
| 157 | + "subset = subset.sort_values(by='ext_gene')\n", |
| 158 | + "subset['sp-number']=[1,2,3,4,5,6,7,8,9]\n", |
| 159 | + "subset = subset.sort_values(by='sp-number')" |
| 160 | + ] |
| 161 | + }, |
| 162 | + { |
| 163 | + "cell_type": "code", |
| 164 | + "execution_count": 11, |
| 165 | + "metadata": {}, |
| 166 | + "outputs": [ |
| 167 | + { |
| 168 | + "data": { |
| 169 | + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAHcAAABkCAYAAAC8e6+/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAGrUlEQVR4nO2db4hdxRmHn59Y/JNUJWTFSpsajBoUrEmXSquSpSBu3FsJNC20H+InYxMFY5oSaQN701pLkWwTg3s1Bg2FfhGKf1hKUBq2tIiyK1nbSlvvGhPESJNNm2Laalvy9sPc3exuzjnJZufeczP7PjDcvWfmvvPe/TFz58yZeUdmhpMmF5TtgNM8XNyEcXETxsVNGBc3YVzchClV3O7ubgM8zS7lUqq4Y2NjZVafPN4tJ0yS4larVSRNpGq1WrZLpaAypx87OztteHi4Kba7uroAGBwcbIr9NkJ5GRe20otmU6vVJv4+fPjwlGvr1q0rxacySbJbdgJJijswMEC9Xqder7N+/XoGBgbKdqkUonTLkhYD9wIngKPAQmCemf0whv2ZUqlUqFQqZVTdVsRqufcDf2v8fYuZbQOQdMX0gpLWShqWNHz06NFI1TtZxBL3YuDXwB+AVY1rmcNwM9tlZp1m1tnR0RGpeieLWKPlPcAa4FNAVdJ3AczseCT7zjkQRVwzGwFGYthy4pHkaNkJuLgJ4+ImjIubMC5uwri4CePiJoyLmzAubsK4uAnj4iaMi5swLm7CRFsgJ6mH8ND+l5S8EsMJRGm5kpYRHtgf4AwrMZzWkdtyJY0Ag8BJADPbWGBnJfAvYNl4eXJWYkhaC6wFWLRo0Uz9dWZAUbd8H3A38Hfg2SIjZvYYgKRrgP1FKzHMbBewC8Ki9HNx2jk7isQdAa4HvgBcRljZWIiZbYjilROFot/cPwNfBj4AVrfGHScmRS33q4QFb/9tkS9OZIrE/V7jdcTMdrfCGScuRd3yATN7kPB765yHFIm7SVIfsLzx6pxnFHXLTwD9FOz/dNqbopa7AtjQSA+1wplarUZPT8+UXfE9PT2tqDpJilruXjPb0TJPnOgUiftky7yYhG+/jEdut2xm/2ulI058/Hluwri4CePiJoyLmzCxAp7cAXwFuA7YBywGLgc2mx+iUBpRWq6Z/dbMfgocBL5hZj8G/kh4FjwFD3jSOqJ1y5K+DbxHEDgXD3jSOmItkPsmIeBJBzAi6fvATcBbMew750asgCfPA8/HsOXEw0fLCePiJoyLmzAubsK4uAnj4iaMi5swLm7CuLgJMyfFnSvnDs05cWu1GkNDQ1OuDQ0NTTm2JhWSOlfobJntCstarUZfXx+jo6MT15YsWUK9Xo/hXjTOO3Gr1Spbt26deN/b21tKt7pxY1GggfYgercsabmkRyX1SZoX0/Zc6lJj0Izf3G8BVeBF4M7YxiuVCv39/ROpWQvYWzHoanYd0Q9qlPQ48AhwG7DAzF6clj8R8AS4AfhLjqmFQNEBu2fKn62Nq4HPTHr/IXB4BjZuAOZPen+C07/rmeo4GxtjZtadUT+YWdQEfBH4EbANmD8LO8OzyW8XG63yMytFH1CZ2ZvAm7HtOjNnzt3nziXaWdxds8xvFxut8vM0Sj352mku7dxynVnSluJKul7SHkmrcvLvkLRZ0m5JCzLyb5K0UdJOSQsL6umR9HJO3ipJOyRtkJQZF0TSYklVSZskfTojf0Xj84ON0InT878m6TFJ/ZKuzaljpaReSU9IujTvu2TRluKa2TuEkz3z8idvX7kiI/9t4K/AZ8kJkjYt0mwW/wQ+IgRay/s/TT43+LR6zOw3wE7gT2Z2MOPz/wYWAJcCR3Lq6AYeJ9zfzmhSqC3FPRvGt6+YWaY4ZvYLYDeQFxp2JfA5YFlD6Omff9XMtgDvEoK/ZDH53OC8yCyrgMzegTBJ8SDwXEEdO4H1wFJmGM2vLR8cSLqKEG/yEkn7zezQtPzx7St7JX0+I78buBm4FujNqsMmRZo1s/0ZPnQBtxJ2LP4gx9U9nDo3eFtOmbsILTyLMWALYUfkz3LKXNR4PQK8klMmEx8tJ8x52y07Z8bFTRgXN2Fc3IRpy9FyLBqj6hsJz0F/DmwF6oSN4Q+Y2cezsL3d2jzsf9LiEu5jx4B9ZnZE0idm1idpM2GCY7Rxy/N14BjwO+B2YDvQBRwHvkM4peVLwOvA7Wa2Blgq6X7gGsLKk58AhwgTHycJt1C/MrM3mv4tc0i6W7ZwvtHLwGpJ9wAXSXoYOGlmo5OK7gN2ECLyGCEM8fiU4zEzewr4GHgGOCjpMuCImT3dKDd+yMc/CKEjAF4qU1hIvOU2BF1CWArzPvCJmWVNFlgjXQC8BjxAEOkFTs0K/cfMTNLJRrkrGy1XwO8JPcTFhCg+HZw6X6k0fBIjYZLuluc6Lm7CuLgJ4+ImjIubMC5uwri4CfN/ibOBuwkDRtcAAAAASUVORK5CYII=\n", |
| 170 | + "text/plain": [ |
| 171 | + "<Figure size 115.2x86.4 with 1 Axes>" |
| 172 | + ] |
| 173 | + }, |
| 174 | + "metadata": { |
| 175 | + "needs_background": "light" |
| 176 | + }, |
| 177 | + "output_type": "display_data" |
| 178 | + } |
| 179 | + ], |
| 180 | + "source": [ |
| 181 | + "fig, ax = plt.subplots(figsize=(4/2.5,3/2.5))\n", |
| 182 | + "fontsize=6\n", |
| 183 | + "lower_bound = np.asarray(subset['TPM']-subset['TPM_ci_lower_bound'])\n", |
| 184 | + "lower_bound = lower_bound*(lower_bound>0)\n", |
| 185 | + "upper_bound = np.asarray(subset['TPM_ci_upper_bound']-subset['TPM'])\n", |
| 186 | + "upper_bound = upper_bound*(upper_bound>0)\n", |
| 187 | + "plt.bar(x=subset['sp-number'], height=subset['TPM'], color='#969696')\n", |
| 188 | + "plt.errorbar(x=subset['sp-number'], y=subset['TPM'], yerr=[lower_bound, upper_bound], fmt='none', c='black', capsize=2)\n", |
| 189 | + "plt.xticks([1,2,3,4,5,6,7,8,9])\n", |
| 190 | + "plt.ylabel('TPM', fontsize=fontsize)\n", |
| 191 | + "plt.xlabel('SP number', fontsize=fontsize)\n", |
| 192 | + "plt.tick_params(axis='both', labelsize=fontsize)\n", |
| 193 | + "ax.set_box_aspect(0.8)\n", |
| 194 | + "ax.spines['top'].set_visible(False)\n", |
| 195 | + "ax.spines['right'].set_visible(False)\n", |
| 196 | + "plt.savefig('SP_expression.pdf', dpi=300)" |
| 197 | + ] |
| 198 | + }, |
| 199 | + { |
| 200 | + "cell_type": "code", |
| 201 | + "execution_count": null, |
| 202 | + "metadata": {}, |
| 203 | + "outputs": [], |
| 204 | + "source": [] |
| 205 | + } |
| 206 | + ], |
| 207 | + "metadata": { |
| 208 | + "kernelspec": { |
| 209 | + "display_name": "Python 3", |
| 210 | + "language": "python", |
| 211 | + "name": "python3" |
| 212 | + }, |
| 213 | + "language_info": { |
| 214 | + "codemirror_mode": { |
| 215 | + "name": "ipython", |
| 216 | + "version": 3 |
| 217 | + }, |
| 218 | + "file_extension": ".py", |
| 219 | + "mimetype": "text/x-python", |
| 220 | + "name": "python", |
| 221 | + "nbconvert_exporter": "python", |
| 222 | + "pygments_lexer": "ipython3", |
| 223 | + "version": "3.9.7" |
| 224 | + } |
| 225 | + }, |
| 226 | + "nbformat": 4, |
| 227 | + "nbformat_minor": 4 |
| 228 | +} |
0 commit comments