From 07ce5352e78343f8b9d3d35ce0ca1f2a1b1afc34 Mon Sep 17 00:00:00 2001
From: Javinator9889 <javialonso007@hotmail.es>
Date: Sun, 22 Sep 2019 17:17:32 +0200
Subject: [PATCH] Possible metadata method - need to be tested with concurrent
 video downlaod

---
 .gitignore                                    |  3 +
 .gitlab-ci.yml                                |  1 +
 YouTubeMDBot/audio/__init__.py                |  4 +-
 YouTubeMDBot/audio/ffmpeg.py                  | 49 +++++++++++
 YouTubeMDBot/audio/fpcalc.py                  | 50 ++++++++++++
 YouTubeMDBot/constants/__init__.py            |  4 +-
 YouTubeMDBot/constants/app_constants.py       |  6 +-
 YouTubeMDBot/downloader/youtube_downloader.py |  6 +-
 .../audio_utils.py => errors/__init__.py}     | 23 ------
 YouTubeMDBot/metadata/MetadataIdentifier.py   | 81 +++++++++++++++----
 YouTubeMDBot/metadata/__init__.py             |  1 +
 YouTubeMDBot/requirements.txt                 |  3 +-
 YouTubeMDBot/tests/identifier.py              | 31 +++++++
 13 files changed, 216 insertions(+), 46 deletions(-)
 create mode 100644 YouTubeMDBot/audio/ffmpeg.py
 create mode 100644 YouTubeMDBot/audio/fpcalc.py
 rename YouTubeMDBot/{audio/audio_utils.py => errors/__init__.py} (58%)
 create mode 100644 YouTubeMDBot/tests/identifier.py

diff --git a/.gitignore b/.gitignore
index 894a44c..7be2372 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,3 +102,6 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# keys folder
+keys/
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 3ffca5e..09e336b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,6 +18,7 @@ cache:
 
 before_script:
   - python -V  # Print out python version for debugging
+  - apt install libchromaprint-tools
 
 test:pylint:
   script:
diff --git a/YouTubeMDBot/audio/__init__.py b/YouTubeMDBot/audio/__init__.py
index 5544681..c6ce402 100644
--- a/YouTubeMDBot/audio/__init__.py
+++ b/YouTubeMDBot/audio/__init__.py
@@ -13,4 +13,6 @@
 #
 #     You should have received a copy of the GNU General Public License
 #    along with this program. If not, see <http://www.gnu.org/licenses/>.
-from ..audio.audio_utils import AudioUtils
+from ..audio.ffmpeg import FFmpegOpener
+from ..audio.ffmpeg import ffmpeg_available
+from ..audio.fpcalc import FPCalc
diff --git a/YouTubeMDBot/audio/ffmpeg.py b/YouTubeMDBot/audio/ffmpeg.py
new file mode 100644
index 0000000..ac8ee7b
--- /dev/null
+++ b/YouTubeMDBot/audio/ffmpeg.py
@@ -0,0 +1,49 @@
+#                             YouTubeMDBot
+#                  Copyright (C) 2019 - Javinator9889
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#      the Free Software Foundation, either version 3 of the License, or
+#                   (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#               GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#    along with this program. If not, see <http://www.gnu.org/licenses/>.
+from io import BytesIO
+from subprocess import PIPE
+from subprocess import Popen
+
+
+def ffmpeg_available() -> bool:
+    try:
+        proc = Popen(["ffmpeg", "-version"],
+                     stdout=PIPE,
+                     stderr=PIPE)
+    except OSError:
+        return False
+    else:
+        proc.wait()
+        return proc.returncode == 0
+
+
+class FFmpegOpener(object):
+    def __init__(self, data: bytes):
+        io = BytesIO(data)
+        self.__ffmpeg_proc = Popen(["ffmpeg", "-i", "-", "-f", "s16le", "-"],
+                                   stdout=PIPE, stderr=PIPE, stdin=io)
+        self.__out = None
+        self.__err = None
+
+    def open(self) -> int:
+        self.__out, self.__err = self.__ffmpeg_proc.communicate()
+        return self.__ffmpeg_proc.returncode
+
+    def get_output(self) -> bytes:
+        return self.__out
+
+    def get_extra(self) -> bytes:
+        return self.__err
diff --git a/YouTubeMDBot/audio/fpcalc.py b/YouTubeMDBot/audio/fpcalc.py
new file mode 100644
index 0000000..d4833fc
--- /dev/null
+++ b/YouTubeMDBot/audio/fpcalc.py
@@ -0,0 +1,50 @@
+#                             YouTubeMDBot
+#                  Copyright (C) 2019 - Javinator9889
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#      the Free Software Foundation, either version 3 of the License, or
+#                   (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#               GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#    along with this program. If not, see <http://www.gnu.org/licenses/>.
+import re
+from subprocess import PIPE
+from subprocess import Popen
+
+from ..constants import FPCALC
+
+
+def is_fpcalc_available() -> bool:
+    try:
+        proc = Popen(["fpcalc", "-v"], stdout=PIPE, stderr=PIPE)
+    except OSError:
+        return False
+    else:
+        proc.wait()
+
+
+class FPCalc(object):
+    def __init__(self, audio: bytes):
+        fpcalc = Popen(FPCALC, stdout=PIPE, stdin=PIPE)
+        out, _ = fpcalc.communicate(audio)
+        res = out.decode("utf-8")
+
+        duration_pattern = "[^=]\\d+\\n"
+        fingerprint_pattern = "[^=]*$"
+        duration = re.search(duration_pattern, res)
+        fingerprint = re.search(fingerprint_pattern, res)
+
+        self.__duration: int = int(duration.group(0))
+        self.__fp: str = str(fingerprint.group(0))
+
+    def duration(self) -> int:
+        return self.__duration
+
+    def fingerprint(self) -> str:
+        return self.__fp
diff --git a/YouTubeMDBot/constants/__init__.py b/YouTubeMDBot/constants/__init__.py
index 1402386..31760cf 100644
--- a/YouTubeMDBot/constants/__init__.py
+++ b/YouTubeMDBot/constants/__init__.py
@@ -13,4 +13,6 @@
 #
 #     You should have received a copy of the GNU General Public License
 #    along with this program. If not, see <http://www.gnu.org/licenses/>.
-from ..constants.app_constants import ydl_cli_options
+from ..constants.app_constants import ACOUSTID_KEY
+from ..constants.app_constants import FPCALC
+from ..constants.app_constants import YDL_CLI_OPTIONS
diff --git a/YouTubeMDBot/constants/app_constants.py b/YouTubeMDBot/constants/app_constants.py
index e1a7cb1..56866d6 100644
--- a/YouTubeMDBot/constants/app_constants.py
+++ b/YouTubeMDBot/constants/app_constants.py
@@ -13,5 +13,9 @@
 #
 #     You should have received a copy of the GNU General Public License
 #    along with this program. If not, see <http://www.gnu.org/licenses/>.
-ydl_cli_options = ["youtube-dl", "--format", "bestaudio[ext=m4a]", "--quiet", "--output",
+import os
+
+YDL_CLI_OPTIONS = ["youtube-dl", "--format", "bestaudio[ext=m4a]", "--quiet", "--output",
                    "-"]
+FPCALC = ["fpcalc", "-"]
+ACOUSTID_KEY = os.environ["ACOUSTID_KEY"]
diff --git a/YouTubeMDBot/downloader/youtube_downloader.py b/YouTubeMDBot/downloader/youtube_downloader.py
index 8736255..6139205 100644
--- a/YouTubeMDBot/downloader/youtube_downloader.py
+++ b/YouTubeMDBot/downloader/youtube_downloader.py
@@ -16,13 +16,13 @@
 from io import BytesIO
 from typing import Tuple
 
-from ..constants.app_constants import ydl_cli_options
+from ..constants.app_constants import YDL_CLI_OPTIONS
 
 
 class YouTubeDownloader(object):
     def __init__(self, url: str):
         self.__url: str = url
-        self.__options: list = ydl_cli_options.copy()
+        self.__options: list = YDL_CLI_OPTIONS.copy()
         self.__options.append(self.__url)
 
     def download(self) -> Tuple[BytesIO, bytes]:
@@ -37,7 +37,7 @@ def download(self) -> Tuple[BytesIO, bytes]:
             return BytesIO(stdout), stdout
         else:
             raise RuntimeError("youtube-dl downloader exception - more info: " +
-                               str(stderr))
+                               str(stderr.decode("utf-8")))
 
     def get_url(self) -> str:
         return self.__url
diff --git a/YouTubeMDBot/audio/audio_utils.py b/YouTubeMDBot/errors/__init__.py
similarity index 58%
rename from YouTubeMDBot/audio/audio_utils.py
rename to YouTubeMDBot/errors/__init__.py
index 1dc5525..8a858f1 100644
--- a/YouTubeMDBot/audio/audio_utils.py
+++ b/YouTubeMDBot/errors/__init__.py
@@ -13,26 +13,3 @@
 #
 #     You should have received a copy of the GNU General Public License
 #    along with this program. If not, see <http://www.gnu.org/licenses/>.
-from io import BytesIO
-
-import soundfile
-
-
-class AudioUtils(object):
-    def __init__(self, audio: BytesIO):
-        self.__audio = soundfile.SoundFile(audio)
-
-    def get_audio_samplerate(self) -> int:
-        return self.__audio.samplerate
-
-    def get_audio_channels(self) -> int:
-        return self.__audio.channels
-
-    def get_audio_duration(self) -> float:
-        return self.__audio.frames / self.get_audio_samplerate()
-
-    def get_audio_name(self) -> str:
-        return self.__audio.name
-
-    def get_audio_format(self) -> str:
-        return self.__audio.format
diff --git a/YouTubeMDBot/metadata/MetadataIdentifier.py b/YouTubeMDBot/metadata/MetadataIdentifier.py
index 26477f9..04768c4 100644
--- a/YouTubeMDBot/metadata/MetadataIdentifier.py
+++ b/YouTubeMDBot/metadata/MetadataIdentifier.py
@@ -13,24 +13,73 @@
 #
 #     You should have received a copy of the GNU General Public License
 #    along with this program. If not, see <http://www.gnu.org/licenses/>.
-from io import BytesIO
-
 import acoustid
+import musicbrainzngs
+
+try:
+    import ujson as json
+except ImportError:
+    import json
 
-from .. import AudioUtils
+from ..audio import FPCalc
+from ..constants import ACOUSTID_KEY
 
 
 class MetadataIdentifier(object):
-    def __init__(self, audio: BytesIO, raw: bytes):
-        self.__audio = raw
-        self.__audio_info = AudioUtils(audio)
-
-    def _calculate_fingerprint(self) -> bytes:
-        return acoustid.fingerprint(self.__audio_info.get_audio_samplerate(),
-                                    self.__audio_info.get_audio_channels(),
-                                    iter(self.__audio))
-
-    def identify_audio(self) -> list:
-        fingerprint = self._calculate_fingerprint()
-        return acoustid.lookup(None, fingerprint,
-                               self.__audio_info.get_audio_duration())
+    def __init__(self, audio: bytes):
+        self.__fingerprint = FPCalc(audio)
+        self.__result: json = None
+        self.__artist: str = ""
+        self.__title: str = ""
+        self.__release_id: str = ""
+        self.__recording_id: str = ""
+        self.__score: float = 0.0
+        self.__cover: bytes = bytes(0)
+
+    def identify_audio(self) -> json:
+        data: json = acoustid.lookup(apikey=ACOUSTID_KEY,
+                                     fingerprint=self.__fingerprint.fingerprint(),
+                                     duration=self.__fingerprint.duration(),
+                                     meta="recordings releaseids")
+        self.__result = data
+        if data["status"] == "ok" and "results" in data:
+            result = data["results"][0]
+            score = result["score"]
+            recording = result["recordings"][0]
+            if recording.get("artists"):
+                names = [artist["name"] for artist in recording["artists"]]
+                artist_name = "; ".join(names)
+            else:
+                artist_name = None
+            title = recording.get("title")
+            release_id = recording["releases"][0]["id"]
+            recording_id = recording.get("id")
+
+            self.__score = score
+            self.__title = title
+            self.__recording_id = recording_id
+            self.__release_id = release_id
+            self.__artist = artist_name
+            self.__cover = musicbrainzngs.get_image_front(release_id)
+        return data
+
+    def get_title(self) -> str:
+        return self.__title
+
+    def get_score(self) -> float:
+        return self.__score
+
+    def get_artist(self) -> str:
+        return self.__artist
+
+    def get_recording_id(self) -> str:
+        return self.__recording_id
+
+    def get_release_id(self) -> str:
+        return self.__release_id
+
+    def get_cover(self) -> bytes:
+        return self.__cover
+
+    def get_results(self) -> json:
+        return self.__result
diff --git a/YouTubeMDBot/metadata/__init__.py b/YouTubeMDBot/metadata/__init__.py
index 8a858f1..3799511 100644
--- a/YouTubeMDBot/metadata/__init__.py
+++ b/YouTubeMDBot/metadata/__init__.py
@@ -13,3 +13,4 @@
 #
 #     You should have received a copy of the GNU General Public License
 #    along with this program. If not, see <http://www.gnu.org/licenses/>.
+from ..metadata.MetadataIdentifier import MetadataIdentifier
diff --git a/YouTubeMDBot/requirements.txt b/YouTubeMDBot/requirements.txt
index 927b4d0..99a3959 100644
--- a/YouTubeMDBot/requirements.txt
+++ b/YouTubeMDBot/requirements.txt
@@ -1,4 +1,5 @@
-SoundFile
+musicbrainzngs
+ujson
 youtube_dl
 pyacoustid
 python-telegram-bot
diff --git a/YouTubeMDBot/tests/identifier.py b/YouTubeMDBot/tests/identifier.py
new file mode 100644
index 0000000..86b1f7d
--- /dev/null
+++ b/YouTubeMDBot/tests/identifier.py
@@ -0,0 +1,31 @@
+import unittest
+from pprint import pprint
+
+from YouTubeMDBot.downloader import YouTubeDownloader
+from YouTubeMDBot.metadata import MetadataIdentifier
+
+
+class IdentifierTest(unittest.TestCase):
+    def test_identification(self):
+        url = "https://www.youtube.com/watch?v=YQHsXMglC9A"
+        downloader = YouTubeDownloader(url=url)
+        audio, data = downloader.download()
+        with open("hello.m4a", "wb") as song:
+            song.write(data)
+        identifier = MetadataIdentifier(audio=data)
+
+        results = identifier.identify_audio()
+        print("{0} by {1} - score: {2} / 1\n"
+              "\thttps://musicbrainz.org/recording/{3}\n"
+              "\thttps://musicbrainz.org/release/{4}\n\n"
+              .format(identifier.get_title(), identifier.get_artist(),
+                      identifier.get_score(),
+                      identifier.get_recording_id(), identifier.get_release_id()))
+        with open("cover.jpg", "wb") as cover:
+            cover.write(identifier.get_cover())
+
+        pprint(results)
+
+
+if __name__ == '__main__':
+    unittest.main()