Analyzing SEC-SAXS data

The following examples shows how to carry out analysis on SEC-SAXS data.

Finding and setting buffer and sample regions

import glob
import os
import bioxtasraw.RAWAPI as raw

#Load the series
profile_names = sorted(glob.glob('./series_data/sec_sample_1/profile_001_*.dat'))
profiles = raw.load_profiles(profile_names)
series = raw.profiles_to_series(profiles)

#Find an appropriate buffer range for subtraction
success, start_idx, end_idx = raw.find_buffer_range(series)

#Set the buffer range for the series
buffer_range = [[start_idx, end_idx]]

(sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer,
    vpmw) = raw.set_buffer_range(series, buffer_range)

#Find an appropriate sample range for subtraction
success, start_idx, end_idx = raw.find_sample_range(series)

#Set the sample range for the series
sample_range = [[start_idx, end_idx]]

sub_profile = raw.set_sample_range(series, sample_range)

#Save the analysis done to the series
if not os.path.exists('./api_results'):
    os.mkdir('./api_results')

raw.save_series(series, 'profile_series.hdf5', './api_results')

Once you have the subtracted profile generated by set_sample_region you can carry out analysis on the individual profile as in the scattering profile analysis tutorial.

Applying a linear baseline correction

import os
import bioxtasraw.RAWAPI as raw

#Load series
xyl_series = raw.load_series(['./series_data/xylanase.hdf5'])[0]

#Set buffer range
success, start, end = raw.find_buffer_range(xyl_series)

(sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer,
    vpmw) = raw.set_buffer_range(xyl_series, [[start, end]])

#Validate baseline range
(lin_valid, lin_valid_results, lin_similarity_results, lin_svd_results,
    lin_intI_results, lin_other_results) = raw.validate_baseline_range(
    xyl_series, [0, 10], [1132, 1142], 'Linear')

#Do baseline correction
(lin_bl_cor_profiles, lin_rg, lin_rger, lin_i0, lin_i0er, lin_vcmw, lin_vcmwer,
    lin_vpmw, lin_bl_corr, lin_fit_results) = raw.set_baseline_correction(
    xyl_series, [0, 10], [1132, 1142], 'Linear')

#Find an appropriate sample range
success, start_idx, end_idx = raw.find_sample_range(xyl_series,
    profile_type='baseline')

sample_range = [[start_idx, end_idx]]

sub_profile = raw.set_sample_range(xyl_series, sample_range,
    profile_type='baseline')

#Save the analysis done to the series
if not os.path.exists('./api_results'):
    os.mkdir('./api_results')

raw.save_series(xyl_series, 'xyl_series.hdf5', './api_results')

Note that setting a buffer range is only necessary if buffer subtraction has not already been performed on the series.

Applying an integral baseline correction

import os
import bioxtasraw.RAWAPI as raw

#Load series
series = raw.load_series(['./series_data/baseline.hdf5'])[0]

#Find baseline range
(start_found, end_found, start_range,
    end_range) = raw.find_baseline_range(series)

#Do baseline correction
(int_bl_cor_profiles, int_rg, int_rger, int_i0, int_i0er, int_vcmw,
    int_vcmwer, int_vpmw, int_bl_corr,
    int_fit_results) = raw.set_baseline_correction(series, start_range,
    end_range, 'Integral')

#Set an appropriate sample range for subtraction
success, start_idx, end_idx = raw.find_sample_range(series,
    profile_type='baseline')

sample_range = [[start_idx, end_idx]]

sub_profile = raw.set_sample_range(series, sample_range,
    profile_type='baseline')

#Save the analysis done to the series
if not os.path.exists('./api_results'):
    os.mkdir('./api_results')

raw.save_series(series, 'profile_series_bl.hdf5', './api_results')

Validating buffer and sample regions

You can validate whatever buffer or sample region you want to set. Note that this validation is done as part of the find_buffer_region and find_sample_region functions, so there’s no need to do it on regions found with those functions.

import bioxtasraw.RAWAPI as raw

#Load series
xyl_series = raw.load_series(['./series_data/xylanase.hdf5'])[0]

#Validate buffer region
buffer_range = [[180, 240], [500, 560]]

(valid, similarity_results, svd_results,
    intI_results) = raw.validate_buffer_range(xyl_series, buffer_range)

if not valid:
    success, start, end = raw.find_buffer_range(xyl_series)
    buffer_range = [[start, end]]

(sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer,
        vpmw) = raw.set_buffer_range(xyl_series, [[start, end]])

#Validate sample region
sample_range = [[785, 815]]

(valid, similarity_results, param_results, svd_results,
    sn_results) = raw.validate_sample_range(xyl_series, sample_range)

if not valid:
    success, start_idx, end_idx = raw.find_sample_range(xyl_series)
    sample_range = [[start_idx, end_idx]]

sub_profile = raw.set_sample_range(xyl_series, sample_range)

Validating baseline regions

You can validate baseline regions. Note that this validation is done as part of the find_baseline_region for an integral baseline, so is not necessary in that case. Also, the linear baseline validation is not terribly useful at the moment, it almost always returns invalid.

import bioxtasraw.RAWAPI as raw

#Load series
series = raw.load_series(['./series_data/baseline.hdf5'])[0]

#Validate linear baseline range
(valid, valid_results, similarity_results, svd_results, intI_results,
    other_results) = raw.validate_baseline_range( series, [0, 10],
    [953, 963], 'Linear')

#Validate integral baseline range
(valid, valid_results, similarity_results, svd_results, intI_results,
    other_results) = raw.validate_baseline_range(series, [539, 568],
    [817, 846])

Carrying out SVD, EFA, and REGALS

You can carry out SVD, EFA, and REGALS from the API (though without the GUI you have to know what the appropriate ranges are for each EFA component as input).

import bioxtasraw.RAWAPI as raw

# Load data
phehc_series = raw.load_series(['./series_data/phehc_sec.hdf5'])[0]

#Do SVD
svd_s, svd_U, svd_V = raw.svd(phehc_series)

#Do EFA
efa_ranges = [[149, 197], [164, 321], [320, 364]]

(efa_profiles, efa_converged, efa_conv_data,
    efa_rotation_data) = raw.efa(phehc_series, efa_ranges)

# Do REGALS
prof1_settings = {
    'type'          : 'simple',
    'lambda'        : 0.0,
    'auto_lambda'   : True,
    'kwargs'        : {},
    }

conc1_settings = {
    'type'          : 'smooth',
    'lambda'        : 1.0,
    'auto_lambda'   : True,
    'kwargs'                : {
        'xmin'              : 145,
        'xmax'              : 195,
        'Nw'                : 50,
        'is_zero_at_xmin'   : True,
        'is_zero_at_xmax'   : True,
        }
    }

prof2_settings = {
    'type'          : 'simple',
    'lambda'        : 0.0,
    'auto_lambda'   : True,
    'kwargs'        : {},
    }

conc2_settings = {
    'type'          : 'smooth',
    'lambda'        : 3.0e3,
    'auto_lambda'   : False,
    'kwargs'                : {
        'xmin'              : 160,
        'xmax'              : 325,
        'Nw'                : 50,
        'is_zero_at_xmin'   : True,
        'is_zero_at_xmax'   : True,
        }
    }

prof3_settings = {
    'type'          : 'simple',
    'lambda'        : 0.0,
    'auto_lambda'   : True,
    'kwargs'        : {},
    }

conc3_settings = {
    'type'          : 'smooth',
    'lambda'        : 1.0,
    'auto_lambda'   : True,
    'kwargs'                : {
        'xmin'              : 320,
        'xmax'              : 383,
        'Nw'                : 50,
        'is_zero_at_xmin'   : True,
        'is_zero_at_xmax'   : True,
        }
    }

comp_settings = [(prof1_settings, conc1_settings),
    (prof2_settings, conc2_settings), (prof3_settings, conc3_settings)]

(regals_profiles, regals_ifts, concs, reg_concs, mixture, params,
    residual) = raw.regals(phehc_series, comp_settings)