From d418b1d60d478b4c341251a5910f19328252308e Mon Sep 17 00:00:00 2001 From: it-is-wednesday Date: Sat, 9 Nov 2019 07:54:26 +0200 Subject: [PATCH] mp3 tagging and album art download --- dune | 2 +- opam | 4 ++-- src/album.ml | 7 +++++++ src/cli.ml | 8 ++++++-- src/desc.ml | 43 ++++++++++++++++++++++++----------------- src/dune | 2 +- src/main.ml | 54 +++++++++++++++++++++++++++++++++++++++++++++------- src/track.ml | 29 +++++++++++++++++++--------- src/util.ml | 6 ++++++ 9 files changed, 116 insertions(+), 39 deletions(-) create mode 100644 src/album.ml diff --git a/dune b/dune index a7003bb..fb9b869 100644 --- a/dune +++ b/dune @@ -1,3 +1,3 @@ (env (dev - (flags (:standard -w @A-48-44-45)))) + (flags (:standard -w @A-48-44-45-40-42)))) diff --git a/opam b/opam index 472cc90..93f1594 100644 --- a/opam +++ b/opam @@ -9,6 +9,6 @@ maintainer: "Maor Kadosh" license: "GPLv3" homepage: "https://git.avocadosh.xyz/a/pitzulit" bug-reports: "https://git.avocadosh.xyz/a/pitzulit/issues" -dev-repo: "https://git.avocadosh.xyz/a/pitzulit" -depends: ["ocaml" "cmdliner" "containers" "cohttp-lwt" "yojson" "re"] +dev-repo: "git+https://git.avocadosh.xyz/a/pitzulit.git" +depends: ["ocaml" "cmdliner" "containers" "cohttp-lwt-unix" "yojson" "re"] build: ["dune" "build" "-p" name "-j" jobs] diff --git a/src/album.ml b/src/album.ml new file mode 100644 index 0000000..edea7ec --- /dev/null +++ b/src/album.ml @@ -0,0 +1,7 @@ +open Containers + +type t = { + title: string; + artist: string; + cover_art: IO.File.t; +} diff --git a/src/cli.ml b/src/cli.ml index bf9f740..5d8ce03 100644 --- a/src/cli.ml +++ b/src/cli.ml @@ -2,7 +2,11 @@ open Cmdliner let no_download = let doc = "Don't run youtube-dl at all; only parse existing audio and JSON files that were already download in previous runs" in - Arg.(value & flag & info ["n"; "no-download"] ~doc) + Arg.(value & flag & info ["no-download"] ~doc) + +let no_extract = + let doc = "Don't extract tracks out of the album file, just tag them" in + Arg.(value & flag & info ["no-extract"] ~doc) let url = let doc = "URL of YouTube video (or any other host supported by youtube-dl)" in @@ -16,5 +20,5 @@ let run main_func = let open Cmdliner in let doc = "sample text" in Term.(exit @@ eval - (Term.(const main_func $ url $ no_download $ dir), + (Term.(const main_func $ url $ dir $ no_download $ no_extract), Term.info "pitzulit" ~version:"v0.1" ~doc)) diff --git a/src/desc.ml b/src/desc.ml index bca8d32..95e2b06 100644 --- a/src/desc.ml +++ b/src/desc.ml @@ -1,9 +1,13 @@ open Containers open Option.Infix -let timestamp_pattern = Re.Perl.compile_pat "(?:\\d+:)?\\d+:\\d+" -let list_item_mark_pattern = Re.Perl.compile_pat "\\d+\\." -let other_noise_pattern = Re.Perl.compile_pat "-|–|-|-" +(* track patterns *) +let timestamp_pat = Re.Perl.compile_pat "(?:\\d+:)?\\d+:\\d+" +let list_item_mark_pat = Re.Perl.compile_pat "\\d+\\." +let other_noise_pat = Re.Perl.compile_pat "-|–|-|-" + +(* video title patterns *) +let album_title_noise_pat = Re.Perl.compile_pat "(\\[|\\()full album(\\]|\\))" ~opts:[`Caseless] type stamp_line = { title: string; @@ -28,7 +32,7 @@ let parse_line (raw_line: string) : stamp_line option = let extract_timestamp line : int option = try - Re.exec timestamp_pattern line + Re.exec timestamp_pat line |> (fun groups -> Re.Group.get groups 0) |> parse_timestamp_string with @@ -37,9 +41,9 @@ let parse_line (raw_line: string) : stamp_line option = let extract_title line = line - |> Re.replace_string ~all:false ~by:"" timestamp_pattern - |> Re.replace_string ~all:false ~by:"" list_item_mark_pattern - |> Re.replace_string ~all:false ~by:"" other_noise_pattern + |> Re.replace_string ~all:false ~by:"" timestamp_pat + |> Re.replace_string ~all:false ~by:"" list_item_mark_pat + |> Re.replace_string ~all:false ~by:"" other_noise_pat |> String.trim in @@ -55,28 +59,33 @@ let parse_tracks_from_desc (desc: string): Track.t list = timestamp. for example: 2:30 bruh song 3:22 second bruh song *) - let stamp_lines = List.filter_map parse_line (String.split ~by:"\\n" desc) in + let stamp_lines = List.filter_map parse_line (String.split ~by:"\n" desc) in (* figure out track's actual time ranges out of the timestamps. we take into account the surrounding lines to calculate it. for example, given the previous example, we can understand that "bruh song" starts at 2:30 and ends at 3:22, because the timestamp in the following line is 3:22. *) let num_of_lines = List.length stamp_lines in - stamp_lines |> List.mapi (fun line_num {title; timestamp_sec} -> - let time = match line_num with + stamp_lines |> List.mapi (fun track_num {title; timestamp_sec} -> + let time = match track_num with (* last track *) - | x when x = (num_of_lines - 1) -> Track.End timestamp_sec + | x when x = (num_of_lines - 1) -> Track.Time.End timestamp_sec (* either the first track or a track in the middle *) | _ -> (* timestamp at next line *) - let next_stamp = (List.get_at_idx_exn (line_num + 1) stamp_lines).timestamp_sec in - match line_num with - | 0 -> Track.Beginning next_stamp - | _ -> Track.Middle (timestamp_sec, next_stamp) + let next_stamp = (List.get_at_idx_exn (track_num + 1) stamp_lines).timestamp_sec in + match track_num with + | 0 -> Track.Time.Beginning next_stamp + | _ -> Track.Time.Middle (timestamp_sec, next_stamp) in - Track.{title; time}) + Track.{title; time; track_num}) let extract_title_data video_title = let s = String.split_on_char '-' video_title in - List.nth s 0, List.nth s 1 + List.nth s 0 + |> Re.replace_string album_title_noise_pat ~by:"" + |> String.trim, + List.nth s 1 + |> Re.replace_string album_title_noise_pat ~by:"" + |> String.trim diff --git a/src/dune b/src/dune index eb06821..653e195 100644 --- a/src/dune +++ b/src/dune @@ -3,6 +3,6 @@ (libraries cmdliner containers - cohttp-lwt + cohttp-lwt-unix yojson re)) diff --git a/src/main.ml b/src/main.ml index aed5ee1..2bf6c3a 100644 --- a/src/main.ml +++ b/src/main.ml @@ -13,9 +13,30 @@ let download url = | 0 -> () | error_code -> Printf.eprintf "youtube-dl failed with error code %d\n" error_code; exit 1 -let main url no_download dir = + +let parse_info_json file_name = + let open Yojson.Basic in + let json = from_file file_name in + Util.to_string (Util.member "title" json), + Util.to_string (Util.member "description" json), + Util.to_string (Util.member "thumbnail" json) |> Uri.of_string + + +let tag file (track: Track.t) (album: Album.t) = + Printf.sprintf "eyeD3 '%s' --title '%s' --artist '%s' --album '%s' --track %d --add-image %s:FRONT_COVER" + file track.title album.artist album.title track.track_num album.cover_art + |> Sys.command + + +let main url dir no_download no_extract = + if not (IO.File.exists dir) then begin + Printf.printf "Directory %s doesn't exist, creating it" dir; + Unix.mkdir dir 0o777; + end; + Printf.printf "Working in %s" (if String.equal dir "." then "current directory" else dir); Sys.chdir dir; + print_endline "Looking for required binaries"; (* make sure the required executables are available via PATH *) let required_bins = ["youtube-dl"; "eyeD3"; "ffmpeg"] in if not (List.for_all Util.does_exec_exists required_bins) then begin @@ -23,13 +44,32 @@ let main url no_download dir = exit 1 end; - if not no_download then download url; + if no_download then + print_endline "Skipping video download" + else + download url; + + print_endline "Parsing .info.json"; + let video_title, desc, cover_uri = parse_info_json "album.mp3.info.json" in + + print_endline "Downloading cover art (video thumbnail)"; + Util.wget cover_uri "cover.jpg" |> Lwt_main.run; + + let album_artist, album_title = Desc.extract_title_data video_title in + (* Printf.printf "Album details found: \"%s\" by %s\n" album_title album_artist; *) + + let album = Album.{ + title = album_title; + artist = album_artist; + cover_art = IO.File.make "cover.jpg" } in - Yojson.Basic.from_file "album.mp3.info.json" - |> Yojson.Basic.Util.member "description" - |> Yojson.Basic.to_string + desc |> Desc.parse_tracks_from_desc - |> List.iter (fun track -> - Track.extract "album.mp3" track |> ignore) + |> List.iter (fun (track : Track.t) -> + let track_file = track.title ^ ".mp3" in + if not no_extract then + Track.extract "album.mp3" track; + tag track_file track album |> ignore; + ) let () = Cli.run main diff --git a/src/track.ml b/src/track.ml index 037d1ca..c4a416a 100644 --- a/src/track.ml +++ b/src/track.ml @@ -1,15 +1,22 @@ -type time = - | Beginning of int (* track's end timestamp in seconds *) - | Middle of int * int (* track's beginning timestamp and end timestamp in seconds *) - | End of int (* track's timestamp (from the beginning!) in seconds *) +open Containers + +module Time = struct + type t = + | Beginning of int (* track's end timestamp in seconds *) + | Middle of int * int (* track's beginning timestamp and end timestamp in seconds *) + | End of int (* track's timestamp (from the beginning!) in seconds *) +end type t = { title: string; - time: time + time: Time.t; + track_num: int; } + let to_string track = let beg, end_ = + let open Time in match track.time with | Beginning x -> 0, x | Middle (x, y) -> x, y @@ -17,15 +24,19 @@ let to_string track = in Printf.sprintf "%s (%d - %d)" track.title beg end_ -let extract album_file {title; time} = - let range = match time with + +let extract album_file {title; time; _} = + let range = + let open Time in + match time with | Beginning end_ -> Printf.sprintf "-t %d" end_ | Middle (beg, end_) -> Printf.sprintf "-ss %d -to %d" beg end_ | End beg -> Printf.sprintf "-ss %d" beg in + let title = String.escaped title in Sys.command (Printf.sprintf - "ffmpeg -loglevel fatal -hide_banner -y %s -i '%s' '%s.mp3'" + "ffmpeg -loglevel info -hide_banner -y %s -i '%s' '%s.mp3'" range (String.escaped album_file) - (String.escaped title)) + title) |> ignore; diff --git a/src/util.ml b/src/util.ml index bebbb4c..4930d63 100644 --- a/src/util.ml +++ b/src/util.ml @@ -13,3 +13,9 @@ let eprint msg = let does_exec_exists name = Sys.command (Printf.sprintf "command -v %s 1> /dev/null" name) = 0 + +let wget uri out_path = + let open Lwt.Infix in + Cohttp_lwt_unix.Client.get uri >>= fun (_resp, body) -> + Cohttp_lwt.Body.to_string body >|= fun body -> + IO.File.write_exn out_path body