Analyzing SEC-SAXS data ************************ The following examples shows how to carry out analysis on SEC-SAXS data. Finding and setting buffer and sample regions +++++++++++++++++++++++++++++++++++++++++++++++ .. code-block:: python import glob import os import bioxtasraw.RAWAPI as raw #Load the series profile_names = sorted(glob.glob('./series_data/sec_sample_1/profile_001_*.dat')) profiles = raw.load_profiles(profile_names) series = raw.profiles_to_series(profiles) #Find an appropriate buffer range for subtraction success, start_idx, end_idx = raw.find_buffer_range(series) #Set the buffer range for the series buffer_range = [[start_idx, end_idx]] (sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer, vpmw) = raw.set_buffer_range(series, buffer_range) #Find an appropriate sample range for subtraction success, start_idx, end_idx = raw.find_sample_range(series) #Set the sample range for the series sample_range = [[start_idx, end_idx]] sub_profile = raw.set_sample_range(series, sample_range) #Save the analysis done to the series if not os.path.exists('./api_results'): os.mkdir('./api_results') raw.save_series(series, 'profile_series.hdf5', './api_results') Once you have the subtracted profile generated by ``set_sample_region`` you can carry out analysis on the individual profile as in :ref:`the scattering profile analysis tutorial. ` Applying a linear baseline correction +++++++++++++++++++++++++++++++++++++++ .. code-block:: python import os import bioxtasraw.RAWAPI as raw #Load series xyl_series = raw.load_series(['./series_data/xylanase.hdf5'])[0] #Set buffer range success, start, end = raw.find_buffer_range(xyl_series) (sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer, vpmw) = raw.set_buffer_range(xyl_series, [[start, end]]) #Validate baseline range (lin_valid, lin_valid_results, lin_similarity_results, lin_svd_results, lin_intI_results, lin_other_results) = raw.validate_baseline_range( xyl_series, [0, 10], [1132, 1142], 'Linear') #Do baseline correction (lin_bl_cor_profiles, lin_rg, lin_rger, lin_i0, lin_i0er, lin_vcmw, lin_vcmwer, lin_vpmw, lin_bl_corr, lin_fit_results) = raw.set_baseline_correction( xyl_series, [0, 10], [1132, 1142], 'Linear') #Find an appropriate sample range success, start_idx, end_idx = raw.find_sample_range(xyl_series, profile_type='baseline') sample_range = [[start_idx, end_idx]] sub_profile = raw.set_sample_range(xyl_series, sample_range, profile_type='baseline') #Save the analysis done to the series if not os.path.exists('./api_results'): os.mkdir('./api_results') raw.save_series(xyl_series, 'xyl_series.hdf5', './api_results') Note that setting a buffer range is only necessary if buffer subtraction has not already been performed on the series. Applying an integral baseline correction +++++++++++++++++++++++++++++++++++++++++ .. code-block:: python import os import bioxtasraw.RAWAPI as raw #Load series series = raw.load_series(['./series_data/baseline.hdf5'])[0] #Find baseline range (start_found, end_found, start_range, end_range) = raw.find_baseline_range(series) #Do baseline correction (int_bl_cor_profiles, int_rg, int_rger, int_i0, int_i0er, int_vcmw, int_vcmwer, int_vpmw, int_bl_corr, int_fit_results) = raw.set_baseline_correction(series, start_range, end_range, 'Integral') #Set an appropriate sample range for subtraction success, start_idx, end_idx = raw.find_sample_range(series, profile_type='baseline') sample_range = [[start_idx, end_idx]] sub_profile = raw.set_sample_range(series, sample_range, profile_type='baseline') #Save the analysis done to the series if not os.path.exists('./api_results'): os.mkdir('./api_results') raw.save_series(series, 'profile_series_bl.hdf5', './api_results') Validating buffer and sample regions +++++++++++++++++++++++++++++++++++++ You can validate whatever buffer or sample region you want to set. Note that this validation is done as part of the ``find_buffer_region`` and ``find_sample_region`` functions, so there's no need to do it on regions found with those functions. .. code-block:: python import bioxtasraw.RAWAPI as raw #Load series xyl_series = raw.load_series(['./series_data/xylanase.hdf5'])[0] #Validate buffer region buffer_range = [[180, 240], [500, 560]] (valid, similarity_results, svd_results, intI_results) = raw.validate_buffer_range(xyl_series, buffer_range) if not valid: success, start, end = raw.find_buffer_range(xyl_series) buffer_range = [[start, end]] (sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer, vpmw) = raw.set_buffer_range(xyl_series, [[start, end]]) #Validate sample region sample_range = [[785, 815]] (valid, similarity_results, param_results, svd_results, sn_results) = raw.validate_sample_range(xyl_series, sample_range) if not valid: success, start_idx, end_idx = raw.find_sample_range(xyl_series) sample_range = [[start_idx, end_idx]] sub_profile = raw.set_sample_range(xyl_series, sample_range) Validating baseline regions ++++++++++++++++++++++++++++ You can validate baseline regions. Note that this validation is done as part of the ``find_baseline_region`` for an integral baseline, so is not necessary in that case. Also, the linear baseline validation is not terribly useful at the moment, it almost always returns invalid. .. code-block:: python import bioxtasraw.RAWAPI as raw #Load series series = raw.load_series(['./series_data/baseline.hdf5'])[0] #Validate linear baseline range (valid, valid_results, similarity_results, svd_results, intI_results, other_results) = raw.validate_baseline_range( series, [0, 10], [953, 963], 'Linear') #Validate integral baseline range (valid, valid_results, similarity_results, svd_results, intI_results, other_results) = raw.validate_baseline_range(series, [539, 568], [817, 846]) Carrying out SVD, EFA, and REGALS ++++++++++++++++++++++++++++++++++ You can carry out SVD, EFA, and REGALS from the API (though without the GUI you have to know what the appropriate ranges are for each EFA component as input). .. code-block:: python import bioxtasraw.RAWAPI as raw # Load data phehc_series = raw.load_series(['./series_data/phehc_sec.hdf5'])[0] #Do SVD svd_s, svd_U, svd_V = raw.svd(phehc_series) #Do EFA efa_ranges = [[149, 197], [164, 321], [320, 364]] (efa_profiles, efa_converged, efa_conv_data, efa_rotation_data) = raw.efa(phehc_series, efa_ranges) # Do REGALS prof1_settings = { 'type' : 'simple', 'lambda' : 0.0, 'auto_lambda' : True, 'kwargs' : {}, } conc1_settings = { 'type' : 'smooth', 'lambda' : 1.0, 'auto_lambda' : True, 'kwargs' : { 'xmin' : 145, 'xmax' : 195, 'Nw' : 50, 'is_zero_at_xmin' : True, 'is_zero_at_xmax' : True, } } prof2_settings = { 'type' : 'simple', 'lambda' : 0.0, 'auto_lambda' : True, 'kwargs' : {}, } conc2_settings = { 'type' : 'smooth', 'lambda' : 3.0e3, 'auto_lambda' : False, 'kwargs' : { 'xmin' : 160, 'xmax' : 325, 'Nw' : 50, 'is_zero_at_xmin' : True, 'is_zero_at_xmax' : True, } } prof3_settings = { 'type' : 'simple', 'lambda' : 0.0, 'auto_lambda' : True, 'kwargs' : {}, } conc3_settings = { 'type' : 'smooth', 'lambda' : 1.0, 'auto_lambda' : True, 'kwargs' : { 'xmin' : 320, 'xmax' : 383, 'Nw' : 50, 'is_zero_at_xmin' : True, 'is_zero_at_xmax' : True, } } comp_settings = [(prof1_settings, conc1_settings), (prof2_settings, conc2_settings), (prof3_settings, conc3_settings)] (regals_profiles, regals_ifts, concs, reg_concs, mixture, params, residual) = raw.regals(phehc_series, comp_settings)