Commit d187e1ba authored by Javinator9889's avatar Javinator9889 🎼

Working on metadata identification - ACRCloud is not suitable (payment)

parent fc78cc4d
Pipeline #69 passed with stage
in 2 minutes and 30 seconds
......@@ -13,4 +13,3 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
class InvalidCredentialsError(Exception):
pass
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
class NoMatchError(Exception):
"""Raises an error when there is no match available"""
pass
......@@ -13,3 +13,4 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from ..errors.NoMatchError import NoMatchError
......@@ -22,64 +22,48 @@ except ImportError:
import json
from ..audio import FPCalc
from ..utils import youtube_utils
from ..constants import ACOUSTID_KEY
from ..downloader import YouTubeDownloader
class MetadataIdentifier(object):
def __init__(self, audio: bytes):
self.__fingerprint = FPCalc(audio)
self.__result: json = None
self.__artist: str = ""
self.__title: str = ""
self.__release_id: str = ""
self.__recording_id: str = ""
self.__score: float = 0.0
self.__cover: bytes = bytes(0)
def __init__(self, audio: bytes, downloader: YouTubeDownloader = None):
self.audio = audio
self.result: json = None
self.artist: str = ""
self.title: str = ""
self.release_id: str = ""
self.recording_id: str = ""
self.score: float = 0.0
self.cover: bytes = bytes(0)
self._downloader = downloader
def identify_audio(self) -> json:
fingerprint = FPCalc(self.audio)
data: json = acoustid.lookup(apikey=ACOUSTID_KEY,
fingerprint=self.__fingerprint.fingerprint(),
duration=self.__fingerprint.duration(),
fingerprint=fingerprint.fingerprint(),
duration=fingerprint.duration(),
meta="recordings releaseids")
self.__result = data
if data["status"] == "ok" and "results" in data:
result = data["results"][0]
score = result["score"]
recording = result["recordings"][0]
if recording.get("artists"):
names = [artist["name"] for artist in recording["artists"]]
artist_name = "; ".join(names)
else:
artist_name = None
title = recording.get("title")
release_id = recording["releases"][0]["id"]
recording_id = recording.get("id")
self.__score = score
self.__title = title
self.__recording_id = recording_id
self.__release_id = release_id
self.__artist = artist_name
self.__cover = musicbrainzngs.get_image_front(release_id)
self.result = data
if "results" in data and data["status"] == "ok":
for result in data["results"]:
if "recordings" not in result:
break
self.score = result["score"]
for recording in result["recordings"]:
if recording.get("artists"):
names = [artist["name"] for artist in recording["artists"]]
self.artist = "; ".join(names)
else:
self.artist = "Unknown"
self.title = recording["title"]
self.release_id = recording["releases"][0]["id"]
self.recording_id = recording["id"]
self.cover = musicbrainzngs.get_image_front(self.release_id)
break
break
elif self._downloader:
id = youtube_utils.get_yt_video_id(self._downloader.get_url())
return data
def get_title(self) -> str:
return self.__title
def get_score(self) -> float:
return self.__score
def get_artist(self) -> str:
return self.__artist
def get_recording_id(self) -> str:
return self.__recording_id
def get_release_id(self) -> str:
return self.__release_id
def get_cover(self) -> bytes:
return self.__cover
def get_results(self) -> json:
return self.__result
import threading
import unittest
from pprint import pprint
from time import sleep
from time import time
from YouTubeMDBot.downloader import YouTubeDownloader
from YouTubeMDBot.metadata import MetadataIdentifier
class IdentifierTest(unittest.TestCase):
lock = threading.Lock()
threads = 0
max = 0
song_info = {}
def test_identification(self):
url = "https://www.youtube.com/watch?v=YQHsXMglC9A"
downloader = YouTubeDownloader(url=url)
......@@ -18,14 +26,65 @@ class IdentifierTest(unittest.TestCase):
print("{0} by {1} - score: {2} / 1\n"
"\thttps://musicbrainz.org/recording/{3}\n"
"\thttps://musicbrainz.org/release/{4}\n\n"
.format(identifier.get_title(), identifier.get_artist(),
identifier.get_score(),
identifier.get_recording_id(), identifier.get_release_id()))
.format(identifier.title, identifier.artist,
identifier.score,
identifier.recording_id, identifier.release_id))
with open("cover.jpg", "wb") as cover:
cover.write(identifier.get_cover())
cover.write(identifier.cover)
pprint(results)
def test_multiple_download_identification(self):
yt1 = YouTubeDownloader(url="https://www.youtube.com/watch?v=Inm-N5rLUSI")
yt2 = YouTubeDownloader(url="https://www.youtube.com/watch?v=-_ZwpOdXXcA")
yt3 = YouTubeDownloader(url="https://www.youtube.com/watch?v=WOGWZD5iT10")
yt4 = YouTubeDownloader(url="https://www.youtube.com/watch?v=GfKV9KaNJXc")
t1 = threading.Thread(target=self.find_metadata, args=(yt1,))
t2 = threading.Thread(target=self.find_metadata, args=(yt2,))
t3 = threading.Thread(target=self.find_metadata, args=(yt3,))
t4 = threading.Thread(target=self.find_metadata, args=(yt4,))
self.max = 4
t1.start()
t2.start()
t3.start()
t4.start()
while self.threads < self.max:
sleep(1)
pprint(self.song_info)
def barrier(self):
with self.lock:
self.threads += 1
def getThreads(self):
with self.lock:
return self.threads
def find_metadata(self, downloader: YouTubeDownloader):
st_dl_t = time()
_, data = downloader.download()
f_dl_t = time()
print("Downloaded {} - elapsed time: {:.1f}s".format(downloader.get_url(),
f_dl_t - st_dl_t))
identifier = MetadataIdentifier(audio=data)
identifier.identify_audio()
self.song_info[downloader.get_url()] = {
"title": identifier.title,
"artist": identifier.artist,
"score": identifier.score,
"record_id": "https://musicbrainz.org/recording/{0}"
.format(identifier.recording_id),
"release_id": "https://musicbrainz.org/release/{0}"
.format(identifier.release_id),
"cover": identifier.cover
}
self.barrier()
if __name__ == '__main__':
unittest.main()
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from ..utils import youtube_utils
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
def get_yt_video_id(url: str) -> str:
# initial version: http://stackoverflow.com/a/7936523/617185 \
# by Mikhail Kashkin(http://stackoverflow.com/users/85739/mikhail-kashkin)
"""Returns Video_ID extracting from the given url of Youtube
Examples of URLs:
Valid:
'http://youtu.be/_lOT2p_FCvA',
'www.youtube.com/watch?v=_lOT2p_FCvA&feature=feedu',
'http://www.youtube.com/embed/_lOT2p_FCvA',
'http://www.youtube.com/v/_lOT2p_FCvA?version=3&amp;hl=en_US',
'https://www.youtube.com/watch?v=rTHlyTphWP0&index=6&list=PLjeDyYvG6-40qawYNR4juzvSOg-ezZ2a6',
'youtube.com/watch?v=_lOT2p_FCvA',
Invalid:
'youtu.be/watch?v=_lOT2p_FCvA',
"""
from urllib.parse import urlparse
from urllib.parse import parse_qs
if url.startswith(('youtu', 'www')):
url = 'http://' + url
query = urlparse(url)
if 'youtube' in query.hostname:
if query.path == '/watch':
return parse_qs(query.query)['v'][0]
elif query.path.startswith(('/embed/', '/v/')):
return query.path.split('/')[2]
elif 'youtu.be' in query.hostname:
return query.path[1:]
else:
raise ValueError
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment