import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels 
import plotly.io as pio 

# import the custom functions for this project
import cytox.core as ctx

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

pd.set_option('display.max_rows', None)
pd.set_option('display.max_Columns', None)

# Importing data (30 min incubation)
file_path_30 = (r'..\data\raw_data\measurement1'
                r'\single_cell_data.txt')
df_30min = pd.read_csv(file_path_30, sep='\t', skiprows=9)
# in this file, instead of skipping 8 lines, I skipped 9

file_path_16 = (r'..\data\raw_data\measurement2'
                r'\single_cell_data.txt')
df_16h = pd.read_csv(file_path_16, sep='\t', skiprows=9)
# in this file, instead of skipping 8 lines, I skipped 9

df_30min.loc[:, "Incubation"] = "30 min"
df_16h.loc[:, "Incubation"] = "16 h"

df_30min.head()

df_16h.head()

# Combining the two dataframes into one dataframe and modifying the columns
df = pd.concat([df_30min, df_16h])

# Map the numbering of the rows to actual alphabet as seen on the plate. 
ctx.map_num_to_letter(df, col='Row', inplace=True)

df.index = df.index + 1

# Convert int to str: It is necessary to convert integer to string before 
# combining several columns into one column.
df.Column = df.Column.astype(str) 

# This is for combining two columns. For separating the data of columns, add 
# the desired character (i.e underline, dot. ...) between '' in the script 
# (before .join) Order of the columns can be changed to get the new desired 
# format.
df['Plate format'] = df[['Row', 'Column']].apply(lambda x: ''.join(x), axis=1)

# Change the order of the columns as desired and remove the unwanted columns.
df = df[['Plate format', 
 'Incubation',
 'Nuclei - Intensity Nucleus Alexa 488 Mean',
 'Nuclei - Intensity Nucleus Alexa 488 Median',
 'Nuclei - Intensity Nucleus Alexa 568 Mean',
 'Nuclei - Intensity Nucleus Alexa 568 Median',
 'Nuclei - Green nuclei',
 'Nuclei - Red nuclei',
 'Nuclei - Red and Green nuclei',
]]

"""Combining the wells of the same samples"""
df = df.copy()

# The naming of the sample is as 
# NameOfCompount_Concentration_DurationOfIncubation.

#--------------30 min compound incubation----------------------------
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("B", "B", 2, 3)), 'Sample'] = "Staurosporine_1uM_6h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("B", "B", 4, 5)), 'Sample'] = "Staurosporine_0.3uM_6h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("B", "B", 6, 7)), 'Sample'] = "Staurosporine_0.1uM_6h"

df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("C", "C", 2, 3)), 'Sample'] = "Saponin_9uM_6h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("C", "C", 4, 5)), 'Sample'] = "Saponin_3uM_6h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("C", "C", 6, 7)), 'Sample'] = "Saponin_1uM_6h"

df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("D", "D", 2, 3)), 'Sample'] = "Verteporfin_6uM_30min"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("D", "D", 4, 5)), 'Sample'] = "Verteporfin_2uM_30min"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("D", "D", 6, 7)), 
    'Sample'] = "Verteporfin_0.6uM_30min"

df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("E", "E", 2, 3)), 'Sample'] = "Digoxin_6uM_30min"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("E", "E", 4, 5)), 'Sample'] = "Digoxin_2uM_30min"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("E", "E", 6, 7)), 'Sample'] = "Digoxin_0.6uM_30min"

df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("F", "F", 2, 3)), 'Sample'] = "Saponin_9uM_30min"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("F", "F", 4, 5)), 'Sample'] = "Saponin_3uM_30min"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("F", "F", 6, 7)), 'Sample'] = "Saponin_1uM_30min"

df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("G", "G", 2, 3)), 'Sample'] = 'DMSO_100%_30min'
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("G", "G", 4, 5)), 'Sample'] = 'DMSO_30%_30min'
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("G", "G", 6, 7)), 'Sample'] = 'DMSO_10%_30min'

#--------------16h compound incubation----------------------------
# We only need Staurosporin and Saponin for proving the experiment works and 
# there is no need to include in both experiments with different duration of 
# incubation. 

df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("D", "D", 8, 9)), 'Sample'] = "Verteporfin_6uM_16h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("E", "E", 8, 9)), 'Sample'] = "Verteporfin_2uM_16h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("F", "F", 8, 9)), 
    'Sample'] = "Verteporfin_0.6uM_16h"


df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("D", "D", 10, 11)), 'Sample'] = "Digoxin_6uM_16h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("E", "E", 10, 11)), 'Sample'] = "Digoxin_2uM_16h"
df.loc[df['Plate format'].isin(
    ctx.well_ID_generator("F", "F", 10, 11)), 'Sample'] = "Digoxin_0.6uM_16h"

df.loc[df['Plate format'].isin(["B8", "C8"]), 'Sample'] = 'DMSO_100%_16h'
df.loc[df['Plate format'].isin(["G8", "G9"]), 'Sample'] = 'DMSO_30%_16h'
df.loc[df['Plate format'].isin(["G10"]), 'Sample'] = 'DMSO_10%_16h'

"""Add also a hypothetical well,"G11", which is an empty well just for the 
sake of figures, otherwise cannot draw the graphs 6 by 6."""
# Create a dictionary for the new row
new_row = {"Plate format": "G11", 
           "Sample": "DMSO_10%_16h", 
           "Incubation": "16 h"}

# Fill the remaining columns with 0
for col in df.columns:
    if col not in new_row:
        new_row[col] = 0

# Convert the dictionary to a DataFrame and concatenate
new_row_df = pd.DataFrame([new_row])
df = pd.concat([df, new_row_df], ignore_index=True)

# Remove the rows with NaN in Sample, because those wells are measured but had 
# no sample in them. 
df = df.dropna(subset=["Sample"])
# If need to check the nan values, use this line:
# df = df.loc[df["COL_NAME"].isnull()]

df.head(2)

df["Incubation"].unique()

array(['30 min', '16 h'], dtype=object)

df_30min = df.loc[df["Incubation"] == "30 min"]
df_16h = df.loc[df["Incubation"] == "16 h"]

# Re-order the samples in the df in the desired way that want to be 
# shown on plots later. 
samples_order_30min = [
    "DMSO_10%_30min", "DMSO_30%_30min", "DMSO_100%_30min", 
    "Staurosporine_0.1uM_6h", "Staurosporine_0.3uM_6h", 
    "Staurosporine_1uM_6h", "Saponin_1uM_6h", "Saponin_3uM_6h", 
    "Saponin_9uM_6h", "Saponin_1uM_30min", "Saponin_3uM_30min", 
    "Saponin_9uM_30min", "Verteporfin_0.6uM_30min", 
    "Verteporfin_2uM_30min", "Verteporfin_6uM_30min", 
    "Digoxin_0.6uM_30min", "Digoxin_2uM_30min", "Digoxin_6uM_30min"
]

samples_order_16h = [
    "DMSO_10%_16h", "DMSO_30%_16h", "DMSO_100%_16h",
    "Verteporfin_0.6uM_16h", "Verteporfin_2uM_16h", 
    "Verteporfin_6uM_16h", "Digoxin_0.6uM_16h", "Digoxin_2uM_16h", 
    "Digoxin_6uM_16h"
]

df_30min = df_30min.copy()
df_16h = df_16h.copy()

df_30min = ctx.sample_order_sorter(
    df_30min, samples_order_30min, "Sample"
)
df_16h = ctx.sample_order_sorter(
    df_16h, samples_order_16h, "Sample"
)

df_30min.to_csv(r'..\data\cleaned_data\df_30min.csv', index = False)
df_16h.to_csv(r'..\data\cleaned_data\df_16h.csv', index = False)

	Row	Column	Timepoint	Field	Object No	X	Y	Bounding Box	Position X [µm]	Position Y [µm]	Compound	Concentration	Cell Type	Cell Count	Nuclei - Nucleus Area [µm²]	Nuclei - Nucleus Roundness	Nuclei - Intensity Nucleus Alexa 488 Mean	Nuclei - Intensity Nucleus Alexa 488 Median	Nuclei - Intensity Nucleus Alexa 568 Mean	Nuclei - Intensity Nucleus Alexa 568 Median	Nuclei - Green nuclei	Nuclei - Red nuclei	Nuclei - Red and Green nuclei	Unnamed: 23	Incubation
0	2	2	1	1	1	299	13	[281,0,329,27]	-68.20	153.28	NaN	NaN	NaN	NaN	82.9212	0.738890	558.311	535	715.520	601	1	1	1	NaN	30 min
1	2	2	1	1	2	789	13	[773,0,825,27]	75.13	153.38	NaN	NaN	NaN	NaN	95.2496	0.774948	391.678	381	284.929	278	0	0	0	NaN	30 min
2	2	2	1	1	3	864	15	[841,0,880,30]	93.03	152.63	NaN	NaN	NaN	NaN	81.4857	0.899973	423.839	404	307.135	292	0	0	0	NaN	30 min
3	2	2	1	1	4	505	95	[468,60,543,129]	-9.67	129.35	NaN	NaN	NaN	NaN	341.3960	0.979878	535.545	527	341.536	337	1	0	0	NaN	30 min
4	2	2	1	1	5	840	89	[826,61,855,114]	87.14	131.58	NaN	NaN	NaN	NaN	86.8055	0.701301	442.963	427	318.589	309	0	0	0	NaN	30 min

	Row	Column	Timepoint	Field	Object No	X	Y	Bounding Box	Position X [µm]	Position Y [µm]	Compound	Concentration	Cell Type	Cell Count	Nuclei - Nucleus Area [µm²]	Nuclei - Nucleus Roundness	Nuclei - Intensity Nucleus Alexa 488 Mean	Nuclei - Intensity Nucleus Alexa 488 Median	Nuclei - Intensity Nucleus Alexa 568 Mean	Nuclei - Intensity Nucleus Alexa 568 Median	Unnamed: 23	Incubation
0	2	8	1	1	1	116	16	[93,0,155,34]	-120.59	152.50	NaN	NaN	NaN	NaN	144.5330	0.818041	358.428	351	231.344	229	NaN	16 h
1	2	8	1	1	2	268	22	[246,0,309,44]	-76.33	150.84	NaN	NaN	NaN	NaN	190.9080	0.902280	481.173	470	315.513	300	NaN	16 h
2	2	8	1	1	3	401	13	[363,0,422,27]	-42.37	153.37	NaN	NaN	NaN	NaN	112.5180	0.749322	455.647	449	315.590	301	NaN	16 h
3	2	8	1	1	4	549	14	[519,0,560,31]	0.76	151.97	NaN	NaN	NaN	NaN	72.0552	0.753797	484.098	476	332.686	329	NaN	16 h
4	2	8	1	1	5	734	21	[711,0,770,42]	58.09	151.32	NaN	NaN	NaN	NaN	153.8250	0.880566	406.104	399	307.142	300	NaN	16 h

Importing packages and libraries¶

Importing and organizing data¶

Defining controls and samples¶

Making separate dataframe¶

Export the data to csv¶