move ascii_reduce to its own module crate

This commit is contained in:
chaos 2023-10-30 22:24:58 +00:00
parent 66a83fbbbb
commit e9cf51e291
No known key found for this signature in database
10 changed files with 166 additions and 58 deletions

69
Cargo.lock generated
View file

@ -80,6 +80,15 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "ascii_reduce"
version = "1.0.0"
dependencies = [
"phf",
"phf_codegen",
"serde_json",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -656,6 +665,7 @@ dependencies = [
name = "musicutil"
version = "0.1.0"
dependencies = [
"ascii_reduce",
"bytes",
"clap",
"html-escape",
@ -726,6 +736,44 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "phf"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
dependencies = [
"siphasher",
]
[[package]]
name = "pkg-config"
version = "0.3.27"
@ -766,6 +814,21 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "redox_syscall"
version = "0.3.5"
@ -917,6 +980,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "string-error"
version = "0.1.0"

View file

@ -21,7 +21,13 @@ serde_with = "3"
# argument parsing
clap = { version = "4", features = ["derive"] }
# ascii_reduce constants & transcode presets
# for reducing filenames to ascii
# useful for when storing music files on mp3 players with broken unicode support
ascii_reduce = { path = "./modules/ascii_reduce" }
# transcode presets & format handlers
lazy_static = "1"
# for scan_for_music

View file

@ -18,11 +18,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1697456312,
"narHash": "sha256-roiSnrqb5r+ehnKCauPLugoU8S36KgmWraHgRqVYndo=",
"lastModified": 1698318101,
"narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "ca012a02bf8327be9e488546faecae5e05d7d749",
"rev": "63678e9f3d3afecfeafa0acead6239cdb447574c",
"type": "github"
},
"original": {

View file

@ -0,0 +1,14 @@
[package]
name = "ascii_reduce"
version = "1.0.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
phf = { version = "0.11", default-features = false }
[build-dependencies]
serde_json = "1.0"
phf_codegen = "0.11"

View file

@ -0,0 +1,48 @@
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
const MAPPINGS_DATA: &str = include_str!("src/mappings.json");
fn main() {
let data: HashMap<String, String> =
serde_json::from_str(MAPPINGS_DATA).expect("mapping data invalid");
let mut replacement_map: HashMap<char, String> = HashMap::new();
for (chr, repl) in &data {
match chr.parse::<u32>() {
Ok(n) => {
let b = char::from_u32(n).expect("invalid char in string");
replacement_map.insert(b, repl.to_string());
}
Err(e) => {
panic!(
"mapping data broken, could not parse char {} with error {}",
chr, e
);
}
}
}
let mut map: &mut phf_codegen::Map<char> = &mut phf_codegen::Map::new();
for replacement in replacement_map.into_iter() {
let r_char = replacement.1;
map = map.entry(
replacement.0,
format!("\"{}\"", r_char.escape_debug()).as_str(),
)
}
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs");
let mut file = BufWriter::new(File::create(&path).unwrap());
write!(
&mut file,
"static MAPPINGS: phf::Map<char, &'static str> = {}",
map.build()
)
.unwrap();
write!(&mut file, ";\n").unwrap();
}

View file

@ -0,0 +1,21 @@
include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
pub fn reduce(input: String) -> String {
if input.is_ascii() {
return input;
}
let mut output = String::with_capacity(input.len());
for c in input.chars() {
if c.is_ascii() {
output.push(c);
continue;
}
if let Some(replacement) = MAPPINGS.get(&c) {
output.push_str(replacement);
}
}
output
}

View file

@ -5,12 +5,13 @@ use std::thread::scope;
use crate::args::CLIArgs;
use crate::types::AudioFileInfo;
use crate::types::File;
use crate::utils::ascii_reduce::reduce_to_ascii;
use crate::utils::formats::get_format_handler;
#[cfg(feature = "replaygain")]
use crate::utils::replaygain::analyze_replaygain;
use crate::utils::scan_for_music;
use ascii_reduce::reduce;
#[derive(Debug, Clone, clap::Args)]
pub struct ProcessCommandArgs {
pub source: String,
@ -40,8 +41,8 @@ fn rename_file(process_args: &ProcessCommandArgs, file: &mut File) {
let artist = artist.replace('\n', "");
// Step 2: Strip ASCII
let title = reduce_to_ascii(title);
let artist = reduce_to_ascii(artist);
let title = reduce(title);
let artist = reduce(artist);
// Step 3: Remove File Seperators
let title = title.replace('\\', &replace_char);

View file

@ -1,50 +0,0 @@
use lazy_static::lazy_static;
use std::collections::HashMap;
const MAPPINGS_DATA: &str = include_str!("mappings.json");
lazy_static! {
static ref MAPPINGS: HashMap<char, String> = {
let data: HashMap<String, String> =
serde_json::from_str(MAPPINGS_DATA).expect("mapping data invalid");
let mut replacement_map: HashMap<char, String> = HashMap::new();
for (chr, repl) in &data {
match chr.parse::<u32>() {
Ok(n) => {
let b = char::from_u32(n).expect("invalid char in string");
replacement_map.insert(b, repl.to_string());
}
Err(e) => {
panic!(
"mapping data broken, could not parse char {} with error {}",
chr, e
);
}
}
}
replacement_map
};
}
pub fn reduce_to_ascii(input: String) -> String {
if input.is_ascii() {
return input;
}
let mut output = String::with_capacity(input.len());
for c in input.chars() {
if c.is_ascii() {
output.push(c);
continue;
}
if let Some(replacement) = MAPPINGS.get(&c) {
output.push_str(replacement);
}
}
output
}

View file

@ -1,4 +1,3 @@
pub mod ascii_reduce;
pub mod ffprobe;
pub mod format_detection;
#[cfg(feature = "replaygain")]