问题
I have this txt file:
BLOCK_START_DATASET
dlcdata L:\loads\confidential\000_Loads_Analysis_Environment\Tools\releases\01_Preprocessor\Version_3.0\Parameterfiles\Bladed4.2\DLC-Files\DLCDataFile.txt
simulationdata L:\loads\confidential\000_Loads_Analysis_Environment\Tools\releases\01_Preprocessor\Version_3.0\Parameterfiles\Bladed4.2\DLC-Files\BladedFile.txt
outputfolder Pfadangabe\runs_test
windfolder L:\loads2\WEC\1002_50-2\_calc\50-2_D135_HH95_RB-AB66-0O_GL2005_towerdesign_Bladed_v4-2_revA01\_wind
referenzfile_servesea L:\loads\confidential\000_Loads_Analysis_Environment\Tools\releases\01_Preprocessor\Version_3.0\Dataset_to_start\Referencefiles\Bladed4.2\DLC\dlc1-1_04a1.$PJ
referenzfile_generalsea L:\loads\confidential\000_Loads_Analysis_Environment\Tools\releases\01_Preprocessor\Version_3.0\Dataset_to_start\Referencefiles\Bladed4.2\DLC\dlc6-1_000_a_50a_022.$PJ
externalcontrollerdll L:\loads\confidential\000_Loads_Analysis_Environment\Tools\releases\01_Preprocessor\Version_3.0\Dataset_to_start\external_Controller\DisCon_V3_2_22.dll
externalcontrollerparameter L:\loads\confidential\000_Loads_Analysis_Environment\Tools\releases\01_Preprocessor\Version_3.0\Dataset_to_start\external_Controller\ext_Ctrl_Data_V3_2_22.txt
BLOCK_END_DATASET
% ------------------------------------
BLOCK_START_WAVE
% a6*x^6 + a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 + a1*x + a0
factor_hs 0.008105;0.029055;0.153752
factor_tz -0.029956;1.050777;2.731063
factor_tp -0.118161;1.809956;3.452903
spectrum_gamma 3.3
BLOCK_END_WAVE
% ------------------------------------
BLOCK_START_EXTREMEWAVE
height_hs1 7.9
period_hs1 11.8
height_hs50 10.8
period_hs50 13.8
height_hred1 10.43
period_hred1 9.9
height_hred50 14.26
period_hred50 11.60
height_hmax1 14.8
period_hmax1 9.9
height_hmax50 20.1
period_hmax50 11.60
BLOCK_END_EXTREMEWAVE
% ------------------------------------
BLOCK_START_TIDE
normal 0.85
yr1 1.7
yr50 2.4
BLOCK_END_TIDE
% ------------------------------------
BLOCK_START_CURRENT
velocity_normal 1.09
velocity_yr1 1.09
velocity_yr50 1.38
BLOCK_END_CURRENT
% ------------------------------------
BLOCK_START_EXTREMEWIND
velocity_v1 29.7
velocity_v50 44.8
velocity_vred1 32.67
velocity_vred50 49.28
velocity_ve1 37.9
velocity_ve50 57
velocity_Vref 50
BLOCK_END_EXTREMEWIND
% ------------------------------------
Currently I'm parsing it this way:
clc, clear all, close all
%Find all row headers
fid = fopen('test_struct.txt','r');
row_headers = textscan(fid,'%s %*[^\n]','CommentStyle','%','CollectOutput',1);
row_headers = row_headers{1};
fclose(fid);
%Find all attributes
fid1 = fopen('test_struct.txt','r');
attributes = textscan(fid1,'%*s %s','CommentStyle','%','CollectOutput',1);
attributes = attributes{1};
fclose(fid1);
%Collect row headers and attributes in a single cell
parameters = [row_headers,attributes];
%Find all the blocks
startIdx = find(~cellfun(@isempty, regexp(parameters, 'BLOCK_START_', 'match')));
endIdx = find(~cellfun(@isempty, regexp(parameters, 'BLOCK_END_', 'match')));
assert(all(size(startIdx) == size(endIdx)))
%Extract fields between BLOCK_START_ and BLOCK_END_
extract_fields = @(n)(parameters(startIdx(n)+1:endIdx(n)-1,1));
struct_fields = arrayfun(extract_fields, 1:numel(startIdx), 'UniformOutput', false);
%Extract attributes between BLOCK_START_ and BLOCK_END_
extract_attributes = @(n)(parameters(startIdx(n)+1:endIdx(n)-1,2));
struct_attributes = arrayfun(extract_attributes, 1:numel(startIdx), 'UniformOutput', false);
%Get structure names stored after each BLOCK_START_
structures_name = @(n) strrep(parameters{startIdx(n)},'BLOCK_START_','');
structure_names = genvarname(arrayfun(structures_name,1:numel(startIdx),'UniformOutput',false));
%Generate structures
for i=1:numel(structure_names)
eval([structure_names{i} '=cell2struct(struct_attributes{i},struct_fields{i},1);'])
end
It works, but not as I want. The overall idea is to read the file into one structure (one field per block BLOCK_START / BLOCK_END). Furthermore, I would like the numbers to be read as double and not as char, and delimiters like "whitespace" "," or ";" have to be read as array separator (e.g. 3;4;5 = [3;4;5] and similar).
To clarify better, I will take the block
BLOCK_START_WAVE
% a6*x^6 + a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 + a1*x + a0
factor_hs 0.008105;0.029055;0.153752
factor_tz -0.029956;1.050777;2.731063
factor_tp -0.118161;1.809956;3.452903
spectrum_gamma 3.3
BLOCK_END_WAVE
The structure will be called WAVE with
WAVE.factor_hs = [0.008105;0.029055;0.153752]
WAVE.factor_tz = [-0.029956;1.050777;2.731063]
WAVE.factor_tp = [-0.118161;1.809956;3.452903]
WAVE.spectrum.gamma = 3.3
Any suggestion will be strongly appreciated.
Best regards.
回答1:
You have answers to this question (which is also yours) as a good starting point! To extract everything into a cell array, you do:
%# Read data from input file
fd = fopen('test_struct.txt', 'rt');
C = textscan(fd, '%s', 'Delimiter', '\r\n', 'CommentStyle', '%');
fclose(fd);
%# Extract indices of start and end lines of each block
start_idx = find(~cellfun(@isempty, regexp(C{1}, 'BLOCK_START', 'match')));
end_idx = find(~cellfun(@isempty, regexp(C{1}, 'BLOCK_END', 'match')));
assert(all(size(start_idx) == size(end_idx)))
%# Extract blocks into a cell array
extract_block = @(n)({C{1}{start_idx(n):end_idx(n) - 1}});
cell_blocks = arrayfun(extract_block, 1:numel(start_idx), 'Uniform', false);
Now, to translate that into corresponding structs, do this:
%# Iterate over each block and convert it into a struct
for i = 1:length(cell_blocks)
%# Extract the block
C = strtrim(cell_blocks{i});
C(cellfun(@(x)isempty(x), C)) = []; %# Ignore empty lines
%# Parse the names and values
params = cellfun(@(s)textscan(s, '%s%s'), {C{2:end}}, 'Uniform', false);
name = strrep(C{1}, 'BLOCK_START_', ''); %# Struct name
fields = cellfun(@(x)x{1}{:}, params, 'Uniform', false);
values = cellfun(@(x)x{2}{:}, params, 'Uniform', false);
%# Create a struct
eval([name, ' = cell2struct({values{idx}}, {fields}, 2)'])
end
回答2:
Well, I've never used matlab, but you could use the following regex to find a block:
/BLOCK_START_(\w+).*?BLOCK_END_\1/s
Then for each block, find all the attributes:
/^(?!BLOCK_END_)(\w+)\s+((?:-?\d+\.?\d*)(?:;(?:-?\d+\.?\d*))*)/m
Then based on the presence of semi colons in the second sub match you could assign it as either a single or multiple value variable. Not sure how to translate that into matLab, but I hope this helps!
来源:https://stackoverflow.com/questions/12352737/parsing-text-file-in-matlab