Compare commits

...

10 Commits

Author SHA1 Message Date
Misha Vicha
f9777b124b Hashmap for file:signals pairs, signal_data file 2025-10-13 08:20:42 +02:00
Misha Vicha
81f768a12a Updates launch json with test file argument 2025-10-13 08:19:05 +02:00
Misha Vicha
cf57876e67 SignalData parsing pt.1 - tb squashed 2025-10-01 14:24:25 +02:00
Misha Vicha
2becccd064 Improved naming of Header constructor
`from_record` is now called `with_record` as the library standard
suggests
2025-10-01 14:04:45 +02:00
Misha Vicha
1c56fdb5bd Improves the handling of Header::from_record
The correct vector capacity is now automatically initialized
2025-10-01 10:33:00 +02:00
Misha Vicha
136fe97fb7 Adds autodoc comments to headparse 2025-10-01 10:29:34 +02:00
Misha Vicha
5735a3b0b5 Renames headproc to more descriptive headparse 2025-10-01 10:12:08 +02:00
Misha Vicha
4982aaa59f Implements all the other signal parsing blocks. 2025-10-01 10:06:10 +02:00
Misha Vicha
9abc4399f5 Adds parsing of the remaining Record arguments 2025-09-30 16:53:19 +02:00
Misha Vicha
8dc16c4210 Adds header parsing with basic arguments 2025-09-30 13:23:06 +02:00
4 changed files with 671 additions and 2 deletions

47
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,47 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug executable 'wfdb_corrosion'",
"cargo": {
"args": [
"build",
"--bin=wfdb_corrosion",
"--package=wfdb_corrosion"
],
"filter": {
"name": "wfdb_corrosion",
"kind": "bin"
}
},
"args": [
"./test_data/s1_high_resistance_bike.hea"
],
"cwd": "${workspaceFolder}"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug unit tests in executable 'wfdb_corrosion'",
"cargo": {
"args": [
"test",
"--no-run",
"--bin=wfdb_corrosion",
"--package=wfdb_corrosion"
],
"filter": {
"name": "wfdb_corrosion",
"kind": "bin"
}
},
"args": [],
"cwd": "${workspaceFolder}"
}
]
}

489
src/headparse.rs Normal file
View File

@@ -0,0 +1,489 @@
use std::{collections::HashMap, hash::Hash, u64, vec};
use crate::SignalFormat;
/// Holds the essential header information about the record, like the amount of
/// signals, and the sampling frequency.
///
/// Only the `name`, `signal_count`, and `sampling_freq` values are required by
/// default.
#[derive(Debug, Clone)]
struct Record {
name: String,
seg_num: Option<u64>,
signal_count: usize,
sampling_freq: u64,
counter_freq: Option<u64>,
base_counter_val: Option<u64>,
sample_num: Option<u64>,
basetime: Option<String>, // I dont thing we really need to care much about these
basedate: Option<String>
}
impl Record {
/// Attempts to generate the record information from a string of the argument
/// line that's provided by the WFDB header file
///
/// Returns a `Result<Record, &str>`, with the error field potentially
/// describing why the parsing operation failed
pub fn from_str(argument_line: &str) -> Result<Record, &str> {
let args: Vec<&str> = argument_line.split(' ').collect();
if args.len() < 2 {
return Err("error: header file provided lacks sufficient record arguments.");
}
let name = args[0].to_string();
let seg_num: Option<u64>;
let sig_count: usize;
let sampling_freq: u64;
// Everything else is initialized with None, this is for sake of me not getting a stroke
let mut counter_freq: Option<u64> = None;
let mut base_counter_val: Option<u64> = None;
let mut sample_num: Option<u64> = None;
let mut basetime: Option<String> = None;
let mut basedate: Option<String> = None;
// Signals and segments are kept in a single argument organized as signal/segment
// segments are optional and we need to check this too
{
let seg_sig: Vec<&str> = args[1].split('/').collect();
if seg_sig.len() == 2 {
match seg_sig[1].parse::<u64>() {
Ok(value) => {
seg_num = Some(value);
}
Err(_) => {
return Err("error: failed to parse segment number from header.");
}
}
} else {
seg_num = None;
}
match seg_sig[0].parse::<usize>() {
Ok(value) => {
sig_count = value;
}
Err(_) => {
return Err("error: failed to parse signal number from header.");
}
}
}
// Parse everything else, if present
loop { // This is apparently the way to get safe correct goto-like behaviour
if args.len() <= 2 { // Sampling frequency and counter frequency
sampling_freq = 250; // Default value
break;
}
{
let freq_dual: Vec<&str> = args[2].split('/').collect();
if freq_dual.len() == 2 {
match freq_dual[1].parse::<u64>() {
Ok(value) => {
counter_freq = Some(value);
}
Err(_) => {
return Err("error: failed to parse counter frequency from header.");
}
}
}
match freq_dual[0].parse::<u64>() {
Ok(value) => {
sampling_freq = value;
}
Err(_) => {
return Err("error: failed to parse sampling frequency from header.");
}
}
}
if args.len() <= 3 {break;}
match args[3].parse::<u64>() {
Ok(value) => {
base_counter_val = Some(value);
}
Err(_) => {}
}
if args.len() <= 4 {break;}
match args[4].parse::<u64>() {
Ok(value) => {
sample_num = Some(value);
}
Err(_) => {}
}
if args.len() <= 5 {break;}
basetime = Some(args[5].to_string());
if args.len() <= 6 {break;}
basedate = Some(args[6].to_string());
break;
}
Ok(Record {
name: name,
seg_num: seg_num,
signal_count: sig_count,
sampling_freq: sampling_freq,
counter_freq: counter_freq,
base_counter_val: base_counter_val,
sample_num: sample_num,
basetime: basetime,
basedate: basedate
})
}
}
/// Keys for all the data that can possibly be parsed from the values appended
/// to the ADC block
#[derive(Debug, Eq, Hash, PartialEq)]
enum AdcBlockKeys {
Gain,
Baseline,
Units
}
/// Holds the information relevant to individual signals and their specifications
///
/// By minimum, only carries the `filename`, `format`, and `adc_zero` attributes
#[derive(Debug, Clone)]
pub struct SignalSpec {
filename: String,
format: SignalFormat,
samples_frame: Option<u64>,
skew: Option<u64>,
offset: Option<u64>,
adc_gain: Option<f64>,
baseline: Option<i64>,
units: Option<String>,
adc_resolution: Option<u64>,
adc_zero: i64,
initial_val: Option<i64>,
checksum: Option<i64>,
blocksize: Option<u64>,
desc: Option<String>
}
impl SignalSpec {
/// Attempts to generate a valid signal specification struct from a supplied
/// string.
/// Returns a `Result<SignalSpec, &str>` containing possible error information
/// relevant to parsing failures.
///
/// ## Arguments
/// - `argument_line` - argument line for the signal specification, taken
/// from header file
pub fn from_str(argument_line: &str) -> Result<SignalSpec, &str> {
let args: Vec<&str> = argument_line.split(' ').collect();
if args.len() < 2 {
return Err("error: signal provided by header file lacks sufficient arguments.");
}
let name = args[0].to_string();
let sigformat: SignalFormat;
// Optional args
let samples_frame: Option<u64> = None;
let skew: Option<u64> = None;
let offset: Option<u64> = None;
let mut adc_gain: Option<f64> = None;
let mut baseline: Option<i64> = None;
let mut units: Option<String> = None;
let mut adc_resolution: Option<u64> = None;
let mut adc_zero:i64 = 0;
let mut initial_val: Option<i64> = None;
let mut checksum: Option<i64> = None;
let mut blocksize: Option<u64> = None;
let mut desc: Option<String> = None;
// TODO: implement samplesperframe, skew, and offset
match args[1].parse::<u64>() {
Ok(value) => {
sigformat = SignalSpec::parse_format(value);
},
Err(_) => {
return Err("error: unable to parse format from signal");
}
}
loop {
if args.len() <= 2 {
break;
}
{
let mut results: HashMap<AdcBlockKeys, String> = HashMap::new();
let mut buffer: String = String::new();
let mut found_baseline = false;
let mut found_units = false;
// Parse through all the characters and get the individual parts
for character in args[2].chars() {
loop {
if !found_baseline && !found_units && character == '(' {
found_baseline = true;
results.insert(AdcBlockKeys::Gain, buffer);
buffer = String::new();
break;
}
else if found_baseline && character == ')' {
results.insert(AdcBlockKeys::Baseline, buffer);
buffer = String::new();
break;
}
else if !found_units && character == '/' {
found_units = true;
if !found_baseline {
results.insert(AdcBlockKeys::Gain, buffer);
}
buffer = String::new();
break;
}
buffer.push(character);
break;
}
}
if found_units {
results.insert(AdcBlockKeys::Units, buffer);
}
// Try and get the actual values out of the results
match results.get(&AdcBlockKeys::Gain) {
Some(value ) => {
match value.parse::<f64>() {
Ok(parsed) => {
adc_gain = Some(parsed);
},
Err(_) => {
return Err("error: Signal contains adc gain block with invalid entry");
}
}
},
None => {
return Err("error: Signal contains adc gain block but the block is empty.");
}
}
match results.get(&AdcBlockKeys::Baseline) {
Some(value) => {
match value.parse::<i64>() {
Ok(parsed) => {
baseline = Some(parsed);
},
Err(_) => {
return Err("error: Unable to parse baseline from signal");
}
}
},
None => {}
}
match results.get(&AdcBlockKeys::Units) {
Some(value) => {
units = Some(value.clone());
},
None => {}
}
}
if args.len() <= 3 {
break;
}
match args[3].parse::<u64>() {
Ok(value) => {
adc_resolution = Some(value);
},
Err(_) => {
return Err("error: unable to parse resolution from signal");
}
}
if args.len() <= 4 {
break;
}
match args[4].parse::<i64>() {
Ok(value) => {
adc_zero = value;
},
Err(_) => {
adc_zero = 0; // Standard: If this field is missing, a value of zero is assumed.
}
}
if args.len() <= 5 {
break;
}
match args[5].parse::<i64>() {
Ok(value) => {
initial_val = Some(value);
},
// Standard: If this field is missing,
// the adc_zero value is presumed.
// We do this on the data parsing part.
Err(_) => {}
}
if args.len() <= 6 {
break;
}
match args[6].parse::<i64>() {
Ok(value) => {
checksum = Some(value);
},
Err(_) => {
return Err("error: unable to parse checksum from signal");
}
}
if args.len() <= 7 {
break;
}
match args[7].parse::<u64>() {
Ok(value) => {
blocksize = Some(value);
},
Err(_) => {
return Err("error: unable to parse block size from signal");
}
}
if args.len() <= 8 {
break;
}
desc = Some(args[8].to_string());
break;
}
Ok(
SignalSpec { filename: name,
format: sigformat,
samples_frame: samples_frame,
skew: skew, offset: offset,
adc_gain: adc_gain,
baseline: baseline,
units: units,
adc_resolution: adc_resolution,
adc_zero: adc_zero,
initial_val: initial_val,
checksum: checksum,
blocksize: blocksize,
desc: desc }
)
}
/// Parses the WFDB signal format from the format number, if it exists and
/// is implemented
fn parse_format(formatnum: u64) -> SignalFormat {
match formatnum {
16 => SignalFormat::Format16,
212 => SignalFormat::Format212,
0..=u64::MAX => SignalFormat::Unimpl
}
}
}
/// Holds all the possible data from the WFDB header file, those being the record
/// line data and all the possible signal specifications present on the following
/// lines
#[derive(Debug, Clone)]
pub struct Header {
record: Option<Record>,
signal_specs: Vec<SignalSpec>
}
impl Header {
/// Creates a completely empty, not fully initialized header
///
/// This is a workaround because I couldn't figure out how to make the compiler
/// accept non-initializing the value for the first pass of a for loop in
/// `parse_header()`
fn new() -> Header {
Header { record: None, signal_specs: vec![] }
}
/// Creates a Header from a supplied Record struct.
///
/// Initializes the signal_specs vector with the correct capacity provided
/// by the record.
fn with_record(record: Record) -> Header {
let capacity = record.signal_count;
Header { record: Some(record), signal_specs: Vec::with_capacity(capacity) }
}
fn add_signal_spec(&mut self, spec: SignalSpec) {
self.signal_specs.push(spec);
}
pub fn is_empty(&self) -> bool {
match self.record {
Some(_) => false,
None => true
}
}
pub fn signals_per_file(&self) -> HashMap<String, u64> {
let mut map: HashMap<String, u64> = HashMap::new();
for signal in &self.signal_specs {
map.entry(signal.filename.clone()).and_modify(|val: &mut u64| *val += 1 ).or_insert(1);
}
return map;
}
}
/// Attempts to parse the header file.
///
/// Returns a Result either containing the `Header` struct, or the
/// error string describing why we couldn't parse the header.
pub fn parse_header(header_data: &str) -> Result<Header, &str> {
let header_lines: Vec<&str> = header_data.split("\n").collect();
let mut found_record: bool = false;
let mut header: Header = Header::new();
let mut specs_read: usize = 0;
let mut specs_max: usize = 0;
for line in header_lines {
// Ignore commented lines
if line.starts_with("#") {
continue;
}
if !found_record {
let possible_record = Record::from_str(line);
match possible_record {
Ok(rec) => {
specs_max = rec.signal_count;
header = Header::with_record(rec);
found_record = true;
continue;
}
Err(e) => {
return Err(e)
}
}
}
let possible_spec = SignalSpec::from_str(line);
match possible_spec {
Ok(spec) => {
header.add_signal_spec(spec);
}
Err(e) => {
return Err(e);
}
}
specs_read += 1;
if specs_read >= specs_max {
break;
}
}
if header.is_empty() {
return Err("Unable to parse valid header information");
}
Ok(header)
}

View File

@@ -1,3 +1,88 @@
fn main() {
println!("Hello, world!");
use std::{env::{self}, path::Path};
use std::fs;
pub mod headparse; // The HEAder parsing
pub mod signal_data;
use crate::{headparse::Header, signal_data::SignalData};
/// Use for handling possible formats of the WFDB data
#[derive(Debug, Clone, Copy)]
enum SignalFormat {
Format16 = 16,
Format212 = 212,
Unimpl = 0
}
fn main() -> Result<(), String>{
let args: Vec<String> = env::args().collect();
if args.len() <= 1 || args.contains(&"help".to_string()) {
help();
return Ok(());
}
let filepath = Path::new(&args[1]);
if !filepath.is_file() {
println!("Path argument provided is not a valid file");
return Ok(());
}
if filepath.extension().unwrap() != "hea" {
println!("File provided is not a .hea file");
return Ok(());
}
// Parse the header information
let header: Header;
{
let hea_file_result = fs::read_to_string(filepath);
match hea_file_result {
Ok(file_data) => {
match headparse::parse_header(file_data.as_str()) {
Ok(h) => {header = h}
Err(e) => {return Err(e.to_string())}
}
}
Err(e) => {return Err("error: Provided file cannot be opened".to_string())}
}
}
let path_parent: String;
{
// This is cursed lol
match filepath.parent() {
Some(p) => {
match p.to_str() {
Some(str) => {
path_parent = str.to_string();
}
None => {path_parent = "".to_string()}
}
}
None => {path_parent = "".to_string()}
}
}
let parsed_signals = get_all_data(header, path_parent);
println!("Hello, world!");
Ok(())
}
fn help() {
println!("Conversion of WFDB files to a more human readable format. By default to a CSV.");
println!("\nUse in the format \"wfdb_corrosion (.hea filename)\"")
}
fn get_signal_data(spec: headparse::SignalSpec, offset: u64, data_length: usize) -> SignalData {
todo!()
}
fn get_all_data(header: Header, header_root: String) -> Vec<SignalData> {
let signal_counts = header.signals_per_file();
for (filename, sigcount) in signal_counts {
}
todo!()
}

48
src/signal_data.rs Normal file
View File

@@ -0,0 +1,48 @@
/// Holds fully processed signal data already parsed from the .dat file, ready
/// to be outputted in a practical way
#[derive(Debug, Clone)]
pub struct SignalData {
name: String,
values: Vec<f64>,
unit: String
}
impl SignalData {
/// Creates a [`SignalData`](crate::SignalData) with a name and an empty
/// `values` vector and `unit` string.
///
/// The `values` vector is initialized with no capacity, which causes this
/// struct to be quite a bit slower. This method is thus not suggested, use
/// [`SignalData::with_name_and_capacity`](crate::SignalData::with_name_and_capacity)
/// when you know how much you want to allocate
///
/// # Arguments
/// * `name` - The name of the Signal
pub fn with_name(name: String) -> SignalData {
SignalData { name: name, values: vec![], unit: String::new() }
}
/// Creates a [`SignalData`](crate::SignalData) with a name and a
/// `values` vector of a certain capacity and an empty `unit` string.
///
/// # Arguments
///
/// * `name` - The name of the Signal
/// * `capacity` - The capacity with which the `values` vector will be
/// initialized
pub fn with_name_and_capacity(name: String, capacity: usize) -> SignalData {
SignalData { name: name, values: Vec::with_capacity(capacity), unit: String::new() }
}
/// Appends a value to `values` inside the struct
pub fn push(&mut self, value: f64) {
self.values.push(value);
}
/// Appends data copied from the supplied `data` argument to the end of the
/// `values` vector inside the struct
pub fn append_copy(&mut self, data: Vec<f64>) {
let mut to_add: Vec<f64> = data;
to_add.append(&mut self.values);
}
}