You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
145 lines
4.0 KiB
145 lines
4.0 KiB
use std::collections::BTreeMap;
|
|
use namegen::{SampleSet, Sample};
|
|
use crate::manifest::SourceManifest;
|
|
|
|
#[derive(Default)]
|
|
pub struct SourceCollection {
|
|
sources: BTreeMap<String, Source>
|
|
}
|
|
|
|
impl SourceCollection {
|
|
pub fn sources(&self) -> impl Iterator<Item = (&str, &Source)> {
|
|
self.sources.iter().map(|(k, v)| (k.as_str(), v))
|
|
}
|
|
|
|
pub fn source(&self, name: &str) -> Option<&Source> {
|
|
self.sources.get(name)
|
|
}
|
|
|
|
pub fn source_mut(&mut self, name: &str) -> &mut Source {
|
|
if !self.sources.contains_key(name) {
|
|
self.sources.insert(name.to_owned(), Source::default());
|
|
}
|
|
|
|
self.sources.get_mut(name).unwrap()
|
|
}
|
|
|
|
pub fn load_full_names(&mut self, data: &str, source: &SourceManifest) -> Result<(), &'static str> {
|
|
for line in data.lines() {
|
|
if line.len() < 1 {
|
|
continue
|
|
}
|
|
|
|
if let Some(prefix) = &source.comment_prefix {
|
|
if line.starts_with(prefix) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
for (i, token) in line.split(' ').enumerate() {
|
|
if let Some(empty_token) = &source.empty_token {
|
|
if token == empty_token.as_str() {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
let mut target = self.source_mut(&source.targets[i]);
|
|
|
|
let mut token = if source.to_lower {
|
|
token.to_owned().to_lowercase()
|
|
} else {
|
|
token.to_owned()
|
|
};
|
|
|
|
target.set_mut("list").add_sample(Sample::Word(token))
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn load_labeled_groups(&mut self, data: &str, source: &SourceManifest) -> Result<(), &'static str> {
|
|
let mut target_index = 0usize;
|
|
let mut new_group = true;
|
|
let mut current_key = String::new();
|
|
|
|
for line in data.lines() {
|
|
if line.len() < 1 {
|
|
new_group = true;
|
|
continue;
|
|
}
|
|
|
|
if let Some(separator) = &source.separator {
|
|
if line.starts_with(separator) {
|
|
target_index += 1;
|
|
new_group;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if let Some(prefix) = &source.comment_prefix {
|
|
if line.starts_with(prefix) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
let mut target = self.source_mut(&source.targets[target_index]);
|
|
|
|
if new_group {
|
|
current_key = line.to_owned();
|
|
let labels: Vec<String> = line.split(' ').map(|s| s.to_owned()).collect();
|
|
|
|
target.set_labeled_mut(line, labels.as_slice());
|
|
new_group = false;
|
|
|
|
continue;
|
|
}
|
|
|
|
let line = if source.to_lower {
|
|
line.to_owned().to_lowercase()
|
|
} else {
|
|
line.to_owned()
|
|
};
|
|
|
|
let tokens: Vec<String> = line.split(' ').map(|s| s.to_owned()).collect();
|
|
let set = target.set_mut(current_key.as_str());
|
|
|
|
if tokens.len() != set.labels().len() {
|
|
return Err("Token length != label count");
|
|
}
|
|
|
|
set.add_sample(
|
|
Sample::Tokens(tokens)
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Default)]
|
|
pub struct Source {
|
|
sets: BTreeMap<String, SampleSet>
|
|
}
|
|
|
|
impl Source {
|
|
pub fn sets(&self) -> impl Iterator<Item = &SampleSet> {
|
|
self.sets.values()
|
|
}
|
|
|
|
pub fn set_mut(&mut self, name: &str) -> &mut SampleSet {
|
|
if !self.sets.contains_key(name) {
|
|
self.sets.insert(name.to_owned(), SampleSet::new());
|
|
}
|
|
|
|
self.sets.get_mut(name).unwrap()
|
|
}
|
|
|
|
pub fn set_labeled_mut<S: AsRef<str>>(&mut self, name: &str, labels: &[S]) -> &mut SampleSet {
|
|
if !self.sets.contains_key(name) {
|
|
self.sets.insert(name.to_owned(), SampleSet::with_labels(labels));
|
|
}
|
|
|
|
self.sets.get_mut(name).unwrap()
|
|
}
|
|
}
|