{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"pd3f","owner":"pd3f","isFork":false,"description":"🏭 PDF text extraction pipeline: self-hosted, local-first, Docker-based ","allTopics":["python","machine-learning","pdf-to-text","language-model","extract-text","pd3f","pdf","ocr","pipeline","text-extraction","parsr"],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":3,"issueCount":15,"starsCount":280,"forksCount":35,"license":"GNU Affero General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-13T17:44:27.705Z"}},{"type":"Public","name":"pd3f-core","owner":"pd3f","isFork":false,"description":"📑 Python Package to reconstruct the original continuous text from PDFs with language models","allTopics":["pdf","machine-learning","text-extraction","language-model","dehyphenation","pd3f"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":23,"issueCount":2,"starsCount":34,"forksCount":8,"license":"GNU Affero General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-08T19:47:01.972Z"}},{"type":"Public","name":"pd3f.com","owner":"pd3f","isFork":false,"description":"📝 Website to advertise & document pd3f","allTopics":["hugo-academic","pd3f"],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":1,"issueCount":0,"starsCount":1,"forksCount":2,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-01-22T22:56:38.907Z"}},{"type":"Public","name":"pd3f-dataset-bmjv","owner":"pd3f","isFork":false,"description":"Dataset of (mostly German) PDFs used to develop pd3f","allTopics":["pdf","german","pd3f"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":0,"starsCount":1,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-08T04:29:31.006Z"}},{"type":"Public","name":"dehyphen","owner":"pd3f","isFork":false,"description":"📜 Dehyphenation of broken text (mainly German), i.e., extracted from a PDF","allTopics":["python","nlp","pdf","hyphens","hyphen","flair-embeddings","dehyphenation","pd3f","german","flair"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":4,"starsCount":37,"forksCount":4,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-03-08T12:43:02.730Z"}},{"type":"Public","name":"pd3-flair","owner":"pd3f","isFork":true,"description":"Flair's language models without unnecessary dependencies","allTopics":["pd3f"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":2070,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-09-15T13:38:24.902Z"}},{"type":"Public","name":"pd3f-results","owner":"pd3f","isFork":false,"description":"Results with pd3f on some PDF datasets","allTopics":["pd3f"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":1,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-08-21T09:02:29.973Z"}}],"repositoryCount":7,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}