1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
// storage.rs
//
// Copyright (c) 2020 All The Music, LLC
//
// This work is licensed under the Creative Commons Attribution 4.0 International License.
// To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/ or send
// a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

pub(crate) mod tar_archive;
/// Batch archive storage backend
pub mod batch_tar_file;
/// Tar archive storage backend
pub mod tar_file;
/// Gzip-compressed Tar archive storage backend
pub mod tar_gz_file;

pub use tar_archive::*;
pub use batch_tar_file::BatchTarFile;
pub use tar_file::TarFile;
pub use tar_gz_file::TarGzFile;

/*****************
***** Traits *****
*****************/

/// Trait to implement storage backends for MIDI files generated from
/// [libatm::MIDIFile](../../libatm/midi_file/struct.MIDIFile.html).
pub trait StorageBackend : Sized {
    /// Error type for storage operations
    type Error: std::fmt::Debug;

    /// Append MIDI file to storage backend
    fn append_file(&mut self, mfile: libatm::MIDIFile, mode: Option<u32>) -> Result<(), Self::Error>;

    /// Convert melody to MIDI file and append to storage backend
    fn append_melody(&mut self, melody: libatm::MIDINoteVec, mode: Option<u32>) -> Result<(), Self::Error> {
        // Create libatm::MIDIFile instance from melody
        let mfile = libatm::MIDIFile::new(melody, libatm::MIDIFormat::Format0, 1, 1);
        self.append_file(mfile, mode)
    }

    /// Conduct cleanup of storage backend and close for writing
    ///
    /// NOTE: For some backends this method may be a NOOP, but should always be called
    /// after the last MIDI file has been written to disk.
    fn finish(&mut self) -> Result<(), Self::Error>;
}

/// Trait to implement functionality for storage backends to expose the underlying
/// (inner) storage object.
pub trait IntoInner : StorageBackend {
    /// Type of inner object
    type Inner;

    /// Finish writing storage backends and return the inner object
    fn into_inner(self) -> Result<Self::Inner, <Self as StorageBackend>::Error>;
}

/// Error type for [PathGenerator](trait.PathGenerator.html)
#[derive(Debug, thiserror::Error)]
pub enum PathGeneratorError {
    /// [PartitionPathGenerator](struct.PartitionPathGenerator.html) error
    #[error(transparent)]
    PartitionPathGenerator(#[from] PartitionPathGeneratorError),
}

/// Trait to generate storage path for MIDI files in storage backends
pub trait PathGenerator {
    /// Generate storage path for MIDI file
    fn gen_path_for_file(&self, mfile: &libatm::MIDIFile) -> Result<String, PathGeneratorError>;
}

/********************************
***** MIDIHashPathGenerator *****
********************************/

/// Path generator that produces the hash of a MIDI file as the filename
/// without any parent directories (see:
/// [MIDIFile::gen_hash](../../libatm/midi_file/struct.MIDIFile.html#method.gen_hash)).
/// This path generator is useful for smaller datasets.
pub struct MIDIHashPathGenerator;

impl PathGenerator for MIDIHashPathGenerator {
    fn gen_path_for_file(&self, mfile: &libatm::MIDIFile) -> Result<String, PathGeneratorError> {
        Ok(format!("{}.mid", mfile.gen_hash()))
    }
}

/*********************************
***** PartitionPathGenerator *****
*********************************/

/// Error type for [PartitionPathGenerator](struct.PartitionPathGenerator.html)
#[derive(Debug, thiserror::Error)]
pub enum PartitionPathGeneratorError {
    #[error("Expected melody of length {expected}, found length {observed}")]
    MelodyLengthMismatch { expected: u32, observed: u32, },
    #[error("Partition depth must be less than the length of generated melodies \
            ({partition_depth} > {melody_length})")]
    PartitionDepthLongerThanMelody { partition_depth: u32, melody_length: u32, },
    #[error("Melodies of length {melody_length} cannot be partitioned with depth \
            {partition_depth} and length {partition_length}")]
    PartitionsLongerThanMelody { melody_length: u32, partition_depth: u32, partition_length: u32, },
}

/// Path generator for storage backends that support partitioned output schemes
///
/// Partitioning files by path in the output storage backend can be useful if not all files 
/// can be written to the same directory/file. For example, most modern filesystem don't perform as
/// well with more than 4K files per directory. Partitioning files into subdirectories with a
/// depth (number of partition branches) and partition length (number of notes per partition) can
/// ensure no more than some threshold files are written to a directory
/// (see: [gen_partition_length](struct.PartitionPathGenerator.html#method.gen_partition_length)).
pub struct PartitionPathGenerator {
    /// Length of melodies to generate partitions for
    melody_length: u32,
    /// Partition depth (i.e., number of partitions to generate)
    partition_depth: u32,
    /// Number of MIDI notes per partition
    partition_length: u32,
}

impl PartitionPathGenerator {
    /// Generate partition length (number of MIDI notes per partition) 
    fn gen_partition_length(
        num_notes: u32,
        num_melodies: u64,
        melody_length: u32,
        max_files: u32,
        partition_depth: u32
    ) -> Result<u32, PartitionPathGeneratorError> {
        // Generate maximum number of partition branches (directories)
        // as quotient of number of generated melodies and
        // maximum number of files per directory
        let max_partitions = (num_melodies as f64) / max_files as f64;

        let partition_length = max_partitions.log(num_notes.pow(partition_depth).into()).ceil() as u32;
        // Ensure melody_length is at least as long as depth * length
        if (melody_length as u32) < partition_depth * partition_length {
            return Err(PartitionPathGeneratorError::PartitionsLongerThanMelody {
                melody_length,
                partition_depth,
                partition_length,
            });
        }
        Ok(partition_length)
    }
    
    /// Create new `PartitionPathGenerator` instance
    pub fn new(
        num_notes: u32,
        melody_length: u32,
        max_files: u32,
        partition_depth: u32
    ) -> Result<Self, PartitionPathGeneratorError> {
        // Ensure partition depth is less than length of generated melodies
        if partition_depth > melody_length as u32 {
            return Err(PartitionPathGeneratorError::PartitionDepthLongerThanMelody {
                partition_depth,
                melody_length,
            });
        }

        // Generate total number of melodies
        let num_melodies = crate::utils::gen_num_melodies(num_notes, melody_length);
        // If number of notes is 1, or total number of generated melodies is
        // less than max files per directory, then partition depth should be 1
        // and partition length should be 0
        let mut calc_partition_depth = 1;
        let mut calc_partition_length = 0;
        if !(num_notes == 1 || num_melodies <= max_files.into()) {
            calc_partition_depth = partition_depth;
            // Generate partition length
            calc_partition_length = Self::gen_partition_length(
                num_notes,
                num_melodies,
                melody_length,
                max_files,
                partition_depth,
            )?;
        }

        Ok(Self {
            melody_length: melody_length as u32,
            partition_depth: calc_partition_depth,
            partition_length: calc_partition_length,
        })
    }

    /// Generate basename (parent directory/directories) for filepath
    fn gen_basename_for_file(&self, mfile: &libatm::MIDIFile) -> Result<String, PathGeneratorError> {
        // Ensure melody is expected length
        let melody_length = mfile.sequence.len() as u32;
        if melody_length != self.melody_length {
            return Err(PathGeneratorError::PartitionPathGenerator(
                PartitionPathGeneratorError::MelodyLengthMismatch {
                    expected: self.melody_length,
                    observed: melody_length
                }
            ));
        }
        
        match self.partition_depth {
            // if partition_depth is zero, return empty basename
            0 => Ok(String::new()),
            // Otherwise, generate partitioned path by
            //  1) Generating self.partition_depth slices of size self.partition_length over the input
            //     melody by using a sliding window method
            //  2) Converting each slice into a string of integer representations of each note in the
            //     slice
            //  3) Joining the slices together using the OS path separator
            _ => Ok((0..self.partition_depth)
                .map(|p| {
                    &mfile.sequence[
                        ( (self.partition_length * p) as usize )..( (self.partition_length * (p + 1)) as usize )
                    ]
                })
                .map(|p| p.iter().map(|n| n.convert().to_string()).collect::<Vec<String>>().join(""))
                .collect::<Vec<String>>()
                .join(&std::path::MAIN_SEPARATOR.to_string()))
        }
    }
}

impl PathGenerator for PartitionPathGenerator {
    fn gen_path_for_file(&self, mfile: &libatm::MIDIFile) -> Result<String, PathGeneratorError> {
        // Generate basename (could be "")
        let basename = self.gen_basename_for_file(mfile)?;
        // Generate filename from MIDI file hash
        let filename = format!("{}.mid", mfile.gen_hash());
        Ok(format!(
            "{}",
            std::path::Path::new(&basename)
                .join(&filename)
                .as_path()
                .to_string_lossy(),
        ))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /*********************************
    ***** PartitionPathGenerator *****
    *********************************/

    #[test]
    #[should_panic]
    fn test_partition_depth_melody_length() {
        // Fails because partition depth must be less
        // less than length of melodies. Each partition branch
        // must contian at least one note, so if depth > # of notes,
        // cannnot generate enough branches from the input melody.        
        let _ = PartitionPathGenerator::new(3, 3, 4096, 4).unwrap();
    }

    #[test]
    #[should_panic]
    fn test_melody_length_match() {
        let path_generator = PartitionPathGenerator::new(4, 12, 4096, 2).unwrap();
        let mfile = libatm::MIDIFile::new(
            "C:4,D:5,G:7".parse::<libatm::MIDINoteVec>().unwrap(),
            libatm::MIDIFormat::Format0,
            1,
            1,
        );
        // Fails because melody isn't 4 notes
        path_generator.gen_path_for_file(&mfile).unwrap();
    }

    macro_rules! check_num_files_partition {
        ($test_name:ident, $note_set:expr, $melody_length:expr, $max_files:expr, $partition_depth:expr) => {
            #[test]
            fn $test_name() { 
                let notes = $note_set.parse::<libatm::MIDINoteSet>().unwrap();
                let num_notes = notes.len() as f32;
                let mut partition = String::new();
                let mut num_files_in_partition = 0;
                let path_generator = PartitionPathGenerator::new(
                    num_notes,
                    $melody_length,
                    $max_files,
                    $partition_depth,
                ).unwrap();

                for melody in crate::utils::gen_sequences(
                    &Vec::from(&notes),
                    $melody_length,
                ) { 
                    // Generate partition for melody
                    let melody_partition = path_generator.gen_basename_for_file(&libatm::MIDIFile::new(
                        melody.iter().map(|n| *n.clone()).collect::<Vec<libatm::MIDINote>>(),
                        libatm::MIDIFormat::Format0,
                        1,
                        1,
                    )).unwrap();
                    // If partition boundary check number of files in partition
                    if melody_partition != partition {
                        assert!(
                            num_files_in_partition as f32 <= $max_files,
                            "{} files in partition, maximum specified was {}",
                            num_files_in_partition,
                            $max_files,
                        );
                        num_files_in_partition = 0;
                        partition = melody_partition;
                    }
                }
            }
        }
    }
}