Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Working on metadata identification - ACRCloud is not suitable (payment)
  • Loading branch information
Javinator9889 committed Sep 24, 2019
1 parent fc78cc4 commit d187e1b
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 57 deletions.
1 change: 0 additions & 1 deletion YouTubeMDBot/__init__.py
Expand Up @@ -13,4 +13,3 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

19 changes: 19 additions & 0 deletions YouTubeMDBot/errors/InvalidCredentialsError.py
@@ -0,0 +1,19 @@
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


class InvalidCredentialsError(Exception):
pass
20 changes: 20 additions & 0 deletions YouTubeMDBot/errors/NoMatchError.py
@@ -0,0 +1,20 @@
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


class NoMatchError(Exception):
"""Raises an error when there is no match available"""
pass
1 change: 1 addition & 0 deletions YouTubeMDBot/errors/__init__.py
Expand Up @@ -13,3 +13,4 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from ..errors.NoMatchError import NoMatchError
88 changes: 36 additions & 52 deletions YouTubeMDBot/metadata/MetadataIdentifier.py
Expand Up @@ -22,64 +22,48 @@
import json

from ..audio import FPCalc
from ..utils import youtube_utils
from ..constants import ACOUSTID_KEY
from ..downloader import YouTubeDownloader


class MetadataIdentifier(object):
def __init__(self, audio: bytes):
self.__fingerprint = FPCalc(audio)
self.__result: json = None
self.__artist: str = ""
self.__title: str = ""
self.__release_id: str = ""
self.__recording_id: str = ""
self.__score: float = 0.0
self.__cover: bytes = bytes(0)
def __init__(self, audio: bytes, downloader: YouTubeDownloader = None):
self.audio = audio
self.result: json = None
self.artist: str = ""
self.title: str = ""
self.release_id: str = ""
self.recording_id: str = ""
self.score: float = 0.0
self.cover: bytes = bytes(0)
self._downloader = downloader

def identify_audio(self) -> json:
fingerprint = FPCalc(self.audio)
data: json = acoustid.lookup(apikey=ACOUSTID_KEY,
fingerprint=self.__fingerprint.fingerprint(),
duration=self.__fingerprint.duration(),
fingerprint=fingerprint.fingerprint(),
duration=fingerprint.duration(),
meta="recordings releaseids")
self.__result = data
if data["status"] == "ok" and "results" in data:
result = data["results"][0]
score = result["score"]
recording = result["recordings"][0]
if recording.get("artists"):
names = [artist["name"] for artist in recording["artists"]]
artist_name = "; ".join(names)
else:
artist_name = None
title = recording.get("title")
release_id = recording["releases"][0]["id"]
recording_id = recording.get("id")

self.__score = score
self.__title = title
self.__recording_id = recording_id
self.__release_id = release_id
self.__artist = artist_name
self.__cover = musicbrainzngs.get_image_front(release_id)
self.result = data
if "results" in data and data["status"] == "ok":
for result in data["results"]:
if "recordings" not in result:
break
self.score = result["score"]
for recording in result["recordings"]:
if recording.get("artists"):
names = [artist["name"] for artist in recording["artists"]]
self.artist = "; ".join(names)
else:
self.artist = "Unknown"
self.title = recording["title"]
self.release_id = recording["releases"][0]["id"]
self.recording_id = recording["id"]
self.cover = musicbrainzngs.get_image_front(self.release_id)
break
break
elif self._downloader:
id = youtube_utils.get_yt_video_id(self._downloader.get_url())

return data

def get_title(self) -> str:
return self.__title

def get_score(self) -> float:
return self.__score

def get_artist(self) -> str:
return self.__artist

def get_recording_id(self) -> str:
return self.__recording_id

def get_release_id(self) -> str:
return self.__release_id

def get_cover(self) -> bytes:
return self.__cover

def get_results(self) -> json:
return self.__result
67 changes: 63 additions & 4 deletions YouTubeMDBot/tests/identifier.py
@@ -1,11 +1,19 @@
import threading
import unittest
from pprint import pprint
from time import sleep
from time import time

from YouTubeMDBot.downloader import YouTubeDownloader
from YouTubeMDBot.metadata import MetadataIdentifier


class IdentifierTest(unittest.TestCase):
lock = threading.Lock()
threads = 0
max = 0
song_info = {}

def test_identification(self):
url = "https://www.youtube.com/watch?v=YQHsXMglC9A"
downloader = YouTubeDownloader(url=url)
Expand All @@ -18,14 +26,65 @@ def test_identification(self):
print("{0} by {1} - score: {2} / 1\n"
"\thttps://musicbrainz.org/recording/{3}\n"
"\thttps://musicbrainz.org/release/{4}\n\n"
.format(identifier.get_title(), identifier.get_artist(),
identifier.get_score(),
identifier.get_recording_id(), identifier.get_release_id()))
.format(identifier.title, identifier.artist,
identifier.score,
identifier.recording_id, identifier.release_id))
with open("cover.jpg", "wb") as cover:
cover.write(identifier.get_cover())
cover.write(identifier.cover)

pprint(results)

def test_multiple_download_identification(self):
yt1 = YouTubeDownloader(url="https://www.youtube.com/watch?v=Inm-N5rLUSI")
yt2 = YouTubeDownloader(url="https://www.youtube.com/watch?v=-_ZwpOdXXcA")
yt3 = YouTubeDownloader(url="https://www.youtube.com/watch?v=WOGWZD5iT10")
yt4 = YouTubeDownloader(url="https://www.youtube.com/watch?v=GfKV9KaNJXc")

t1 = threading.Thread(target=self.find_metadata, args=(yt1,))
t2 = threading.Thread(target=self.find_metadata, args=(yt2,))
t3 = threading.Thread(target=self.find_metadata, args=(yt3,))
t4 = threading.Thread(target=self.find_metadata, args=(yt4,))

self.max = 4

t1.start()
t2.start()
t3.start()
t4.start()

while self.threads < self.max:
sleep(1)

pprint(self.song_info)

def barrier(self):
with self.lock:
self.threads += 1

def getThreads(self):
with self.lock:
return self.threads

def find_metadata(self, downloader: YouTubeDownloader):
st_dl_t = time()
_, data = downloader.download()
f_dl_t = time()
print("Downloaded {} - elapsed time: {:.1f}s".format(downloader.get_url(),
f_dl_t - st_dl_t))
identifier = MetadataIdentifier(audio=data)
identifier.identify_audio()
self.song_info[downloader.get_url()] = {
"title": identifier.title,
"artist": identifier.artist,
"score": identifier.score,
"record_id": "https://musicbrainz.org/recording/{0}"
.format(identifier.recording_id),
"release_id": "https://musicbrainz.org/release/{0}"
.format(identifier.release_id),
"cover": identifier.cover
}
self.barrier()


if __name__ == '__main__':
unittest.main()
16 changes: 16 additions & 0 deletions YouTubeMDBot/utils/__init__.py
@@ -0,0 +1,16 @@
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from ..utils import youtube_utils
52 changes: 52 additions & 0 deletions YouTubeMDBot/utils/youtube_utils.py
@@ -0,0 +1,52 @@
# YouTubeMDBot
# Copyright (C) 2019 - Javinator9889
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


def get_yt_video_id(url: str) -> str:
# initial version: http://stackoverflow.com/a/7936523/617185 \
# by Mikhail Kashkin(http://stackoverflow.com/users/85739/mikhail-kashkin)
"""Returns Video_ID extracting from the given url of Youtube
Examples of URLs:
Valid:
'http://youtu.be/_lOT2p_FCvA',
'www.youtube.com/watch?v=_lOT2p_FCvA&feature=feedu',
'http://www.youtube.com/embed/_lOT2p_FCvA',
'http://www.youtube.com/v/_lOT2p_FCvA?version=3&amp;hl=en_US',
'https://www.youtube.com/watch?v=rTHlyTphWP0&index=6&list=PLjeDyYvG6-40qawYNR4juzvSOg-ezZ2a6',
'youtube.com/watch?v=_lOT2p_FCvA',
Invalid:
'youtu.be/watch?v=_lOT2p_FCvA',
"""

from urllib.parse import urlparse
from urllib.parse import parse_qs

if url.startswith(('youtu', 'www')):
url = 'http://' + url

query = urlparse(url)

if 'youtube' in query.hostname:
if query.path == '/watch':
return parse_qs(query.query)['v'][0]
elif query.path.startswith(('/embed/', '/v/')):
return query.path.split('/')[2]
elif 'youtu.be' in query.hostname:
return query.path[1:]
else:
raise ValueError

0 comments on commit d187e1b

Please sign in to comment.