Detecting aerosolized fluorophores with Rapid-E

Classifying bacterial-enriched bioaerosols with Rapid-E and ML

Authors

Affiliations

Alejandro Fontal

Climate & Health Program, ISGlobal

Sílvia Borràs Aiguadé

AIRLAB, Climate & Health Program, ISGlobal

Sofya Pozdniakova

AIRLAB, Climate & Health Program, ISGlobal

Lídia Cañas

AIRLAB, Climate & Health Program, ISGlobal

Department of Evolutionary Biology, Ecology and Environmental Sciences, University of Barcelona

Xavier Rodó

Climate & Health Program, ISGlobal

Catalonia Institute for Advanced Studies

Published

December 2, 2025

Preamble

Imports

import os
import sys
import bz2
import pickle

sys.path.append('..')
sys.path.append('../../aerosolpy')

import numpy as np
import pandas as pd
import seaborn as sns
import plotnine as p9
import matplotlib.pyplot as plt

from glob import glob
from tqdm.auto import tqdm
from functools import partial
from collections import defaultdict
from mizani.breaks import date_breaks
from mizani.formatters import date_format, percent_format
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

from aerosolpy.conversion import Conversion
from aerosolpy.particles import AerosolParticlesData, ParticleData
from aerosolpy.particles import (CORRECTED_SPECTRAL_WAVELENGTHS,
                                 WAVELENGTH_LIFETIME_RANGES,
                                 SCATTERING_ANGLES)

Config

# Matplotlib settings
from matplotlib_inline.backend_inline import set_matplotlib_formats
plt.rcParams['font.family'] = 'Georgia'
plt.rcParams['svg.fonttype'] = 'none'
set_matplotlib_formats('retina')
plt.rcParams['figure.dpi'] = 300
# Plotnine settings (for figures)

p9.options.set_option('base_family', 'Georgia')

p9.theme_set(
    p9.theme_bw()
    + p9.theme(panel_grid=p9.element_blank(),
               legend_background=p9.element_blank(),
               panel_grid_major=p9.element_line(size=.5, linetype='dashed',
                                                alpha=.15, color='black'),
               dpi=300
    )
)

Loading Data

for folder in glob('../data/fluorophores/*'):
    fluorophore = os.path.basename(folder)
    if os.path.exists(f'{folder}/particles.pickle.bz2'):
        continue
    else: 
        for filename in glob(f'{folder}/*.zip'):
            converter = Conversion(filename, mode='user', keep_threshold=True, keep_zip=True)
            converter.save_overall()
        particles = AerosolParticlesData.from_folder(folder)
        with bz2.BZ2File(f'{folder}/particles.pickle.bz2', 'w') as fh:
            pickle.dump(particles, fh)

particles_dict = {}
for folder in glob('../data/fluorophores/*'):
    fluorophore = os.path.basename(folder)
    with bz2.BZ2File(f'{folder}/particles.pickle.bz2', 'r') as fh:
        particles = pickle.load(fh)
    particles_dict[fluorophore] = particles

Analysis

summary_df = []
for fluorophore, particles in particles_dict.items():
    summary_df.append(particles.summary_df.assign(fluorophore=fluorophore))
summary_df = pd.concat(summary_df)

(summary_df
 .replace({'fluorophore': {'Riboflavin+acetic': 'Riboflavin\n[AcOH]'}})
 .assign(fluorescent=lambda dd: dd['intensity'] >= 2000)
 .groupby(['fluorophore'])
 .size()
 .rename('count')
 .sort_values(ascending=False)
 .apply(lambda x: int(round(x / 10)))
)

fluorophore
Tyrosine              4064
Riboflavin\n[AcOH]    3132
Tryptophan            2162
NADH                  1885
Riboflavin             853
Name: count, dtype: int64

Fluorescence rates

f = (summary_df
 .replace({'fluorophore': {'Riboflavin+acetic': 'Riboflavin\n[AcOH]'}})
 .assign(fluorescent=lambda dd: dd['intensity'] >= 2000)
 .groupby(['fluorophore', 'fluorescent'])
 .size()
 .rename('count')
 .reset_index()
 .assign(percentages=lambda dd: dd.groupby('fluorophore')['count'].transform(lambda x: x / x.sum() * 100).round(2))
 .assign(fluorophore=lambda dd: pd.Categorical(dd['fluorophore'], categories=dd.query('fluorescent').sort_values('percentages')['fluorophore']))
 .pipe(lambda dd: p9.ggplot(dd)
       + p9.aes(x='fluorophore', y='count', fill='fluorescent')
       + p9.geom_col()
       + p9.coord_flip()
       + p9.geom_text(p9.aes(label='percentages.astype(str) + "%"'),
                      nudge_y=1000, size=8, ha='left',
                       data=dd.query('fluorescent'))
       + p9.scale_y_continuous(expand=(0, 0, .1, 0))
       + p9.scale_fill_manual(values=[None, '#00FF00'])
       + p9.labs(x='', y='Total number of aerosol particles', fill='Fluorescent*',
                 caption='*Max Fluorescence Intensity >= 2000 [a.u.]')
       + p9.theme(figure_size=(4, 3), legend_position='top',
                   plot_caption=p9.element_text(size=7))
 )
       )

f.save('../output/figures/fluorophores_fluorescent_counts.svg')
f.draw()

intensity_stats = (summary_df
 .groupby('fluorophore')
 .agg(mean=('intensity', 'mean'),
      median=('intensity', 'median'),
      q95=('intensity', lambda x: np.percentile(x, 95)),
      q99=('intensity', lambda x: np.percentile(x, 99)))
      .reset_index()
)
intensity_stats

	fluorophore	mean	median	q95	q99
0	NADH	694.887509	610.0	1324.0	2360.35
1	Riboflavin	703.847867	557.0	1425.0	3403.46
2	Riboflavin+acetic	1089.236964	634.0	3793.4	8261.40
3	Tryptophan	998.281373	772.0	1983.0	5381.74
4	Tyrosine	676.762471	592.0	1284.0	2028.30

Size distributions

f = (summary_df
 .assign(intensity=lambda dd: dd.intensity.astype(int))
 .pipe(lambda dd: p9.ggplot(dd)
       + p9.geom_histogram(p9.aes(x='intensity'), binwidth=100)
       + p9.facet_wrap('fluorophore', ncol=1, scales='free_y')
       + p9.geom_vline(p9.aes(xintercept='median'), color='red', linetype='dashed',
                       data=intensity_stats)
       + p9.geom_vline(p9.aes(xintercept='q95'), color='blue', linetype='dashed',
                          data=intensity_stats)
       + p9.geom_vline(p9.aes(xintercept='q99'), color='blue', linetype='dotted',
                       data=intensity_stats)
       + p9.labs(x='Fluorescence Intensity [a.u.]', y='Particles count')
       + p9.scale_x_continuous(limits=(0, 10000))
       + p9.theme(figure_size=(4, 3.5),
                  axis_text_y=p9.element_text(size=7))
 )
)
f.save('../output/figures/fluorescence_intensity_histograms.svg')
f.draw()

fluo_particles_dict = {}
for fluorophore, particles in particles_dict.items():
    fluo_particles_dict[fluorophore] = particles.filter('intensity >= 2000')

Filtering particles with query: intensity >= 2000
Filtering particles with query: intensity >= 2000
Filtering particles with query: intensity >= 2000
Filtering particles with query: intensity >= 2000
Filtering particles with query: intensity >= 2000

super_fluo_particles_dict = {}
for fluorophore, particles in particles_dict.items():
    super_fluo_particles_dict[fluorophore] = particles.filter('intensity >= 2500')

Filtering particles with query: intensity >= 2500
Filtering particles with query: intensity >= 2500
Filtering particles with query: intensity >= 2500
Filtering particles with query: intensity >= 2500
Filtering particles with query: intensity >= 2500

Spectra for top most fluorescent particles

spectra_df = []
for fluorophore in super_fluo_particles_dict:
    for i, particle in enumerate(super_fluo_particles_dict[fluorophore]):
        spectra_df.append(particle.spectrum_time_matrix.sum(axis=1)
                  .rename('intensity')
                  .reset_index()
                  .assign(relative_intensity=lambda dd: dd['intensity'] / dd['intensity'].max())
                  .assign(fluorophore=fluorophore)
                  .assign(particle_index=i)
                  .assign(max_intensity=lambda dd: dd['intensity'].max())
 )
spectra_df = pd.concat(spectra_df)

n = 100
selected_particles = (spectra_df
 .groupby('fluorophore')
 .apply(lambda dd: dd[['particle_index', 'max_intensity']].drop_duplicates()
        .sort_values('max_intensity', ascending=False)
        .head(n).assign(new_index=range(1, n + 1)))
        .reset_index()
 [['fluorophore', 'particle_index', 'new_index']]
)


f = (spectra_df
 .merge(selected_particles, on=['fluorophore', 'particle_index'])
 .replace('Riboflavin+acetic', 'Riboflavin [AcOH]')
 .assign(fluorophore=lambda dd: pd.Categorical(dd['fluorophore'], categories= [
     'Tyrosine', 'Tryptophan', 'NADH', 'Riboflavin', 'Riboflavin [AcOH]'
 ]))
 .pipe(lambda dd: p9.ggplot(dd)
       + p9.aes(x='wavelength', y='new_index')
         + p9.geom_tile(p9.aes(fill='relative_intensity'))
         + p9.scale_x_continuous(expand=(0, 0), breaks=range(300, 601, 50))
         + p9.scale_y_continuous(expand=(0, 0), trans='reverse', breaks=[1, 50, 100])
         + p9.scale_fill_cmap('viridis', labels=percent_format())
         + p9.facet_wrap('fluorophore')
         + p9.labs(x='Wavelength [nm]', y='Particle index (top 100 most fluorescent)', fill='Relative Intensity')
         + p9.theme(
              axis_text_y=p9.element_text(size=7),
              axis_title_y=p9.element_text(size=10),
              legend_position=(.9, .05),
              legend_key_size=10,
              legend_text=p9.element_text(size=8),
              legend_title=p9.element_text(size=9, ha='center'),
              axis_title_x=p9.element_text(size=10, va='top'),
              axis_text_x=p9.element_text(size=7),
              figure_size=(6, 4),
              plot_margin=.03
         )
 )
)
f.save('../output/figures/fluorophores_fluorescence_spectra.svg')
f.draw()

f = (spectra_df
 .merge(selected_particles, on=['fluorophore', 'particle_index'])
 .replace('Riboflavin+acetic', 'Riboflavin [AcOH]')
 .assign(fluorophore=lambda dd: pd.Categorical(dd['fluorophore'], categories= [
     'Tyrosine', 'Tryptophan', 'NADH', 'Riboflavin', 'Riboflavin [AcOH]'
 ]))
 .pipe(lambda dd: p9.ggplot(dd)
       + p9.aes(x='wavelength', y='relative_intensity')
       + p9.scale_color_manual(['black', '#EE1E26', '#5EBE46', '#344CA5', "#344CA5"])
       + p9.scale_fill_manual(['black', '#EE1E26', '#5EBE46', '#344CA5', "#344CA5"])
       + p9.scale_x_continuous(breaks=range(300, 601, 50))
       + p9.scale_y_continuous(breaks=[0, .2, .4, .6, .8, 1])
       + p9.labs(y='Relative Fluorescence Intensity [a.u.]', x='Wavelength [nm]', color='', fill='')
       + p9.stat_summary(geom='line', size=.7, fun_y=np.median, mapping=p9.aes(color='fluorophore', linetype='fluorophore=="Riboflavin"'))
       + p9.stat_summary(fun_ymax=lambda x: x.quantile(.75),
                         fun_ymin=lambda x: x.quantile(.25),
                          geom='ribbon', alpha=.15, mapping=p9.aes(fill='fluorophore'))
       + p9.guides(linetype=False)
       + p9.theme(figure_size=(5, 3),
                  axis_title_y=p9.element_text(size=10),
       )
    )
       )
f.save('../output/figures/fluorophores_fluorescence_spectra_median.svg')
f.draw()

lifetimes = []
for fluorophore, particles in super_fluo_particles_dict.items():
    selected_idxs = selected_particles.query('fluorophore == @fluorophore')['particle_index']
    for i in selected_idxs:
        particle = particles[i]
        lifetimes.append(particle.lifetime
                         .assign(fluorophore=fluorophore)
                         .assign(particle_index=i)
        )
lifetimes = pd.concat(lifetimes)

lifetime_stats = (lifetimes
 .replace({'350-400 nm': '300-340 nm'})
 .groupby(['fluorophore', 'time', 'wavelength_range'], as_index=False)
 .agg(
     median=('intensity', 'median'),
    q05=('intensity', lambda x: x.quantile(.05)),
    q25=('intensity', lambda x: x.quantile(.25)),
    q75=('intensity', lambda x: x.quantile(.75)),
    q95=('intensity', lambda x: x.quantile(.95)),
 )
)

(lifetime_stats
 .replace('Riboflavin+acetic', 'Riboflavin [AcOH]')
 .pipe(lambda dd: p9.ggplot(dd)
       + p9.aes(x='time', y='median', color='wavelength_range')
       + p9.geom_line()
       + p9.facet_wrap('fluorophore', ncol=3, scales='free_y')
       + p9.labs(x='Time [ns]', y='Median Intensity [a.u.]', color='Wavelength Range')
       + p9.theme(
           legend_position=(.925, .1),
           figure_size=(6, 3.5),
           legend_title=p9.element_text(size=9),
           legend_text=p9.element_text(size=8),
           legend_key_size=12,
                  )
 )
)