{"payload":{"pageCount":3,"repositories":[{"type":"Public","name":"sklearn-crfsuite","owner":"TeamHG-Memex","isFork":false,"description":"scikit-learn inspired API for CRFsuite","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":12,"issueCount":34,"starsCount":425,"forksCount":214,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-25T19:44:09.103Z"}},{"type":"Public","name":"agnostic","owner":"TeamHG-Memex","isFork":false,"description":"Agnostic Database Migrations","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":8,"starsCount":52,"forksCount":18,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-10T02:44:38.933Z"}},{"type":"Public","name":"soft404","owner":"TeamHG-Memex","isFork":false,"description":"A classifier for detecting soft 404 pages","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":5,"issueCount":3,"starsCount":56,"forksCount":14,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-07-06T21:10:26.020Z"}},{"type":"Public","name":"Formasaurus","owner":"TeamHG-Memex","isFork":false,"description":"Formasaurus tells you the type of an HTML form and its fields using machine learning","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":2,"issueCount":12,"starsCount":115,"forksCount":47,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-23T22:11:25.981Z"}},{"type":"Public","name":"tensorboard_logger","owner":"TeamHG-Memex","isFork":false,"description":"Log TensorBoard events without touching TensorFlow","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":4,"issueCount":9,"starsCount":631,"forksCount":54,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-26T20:24:35.609Z"}},{"type":"Public","name":"arachnado","owner":"TeamHG-Memex","isFork":false,"description":"Web Crawling UI and HTTP API, based on Scrapy and Tornado","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":16,"starsCount":159,"forksCount":65,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-04T19:11:13.623Z"}},{"type":"Public","name":"scrapy-crawl-once","owner":"TeamHG-Memex","isFork":false,"description":"Scrapy middleware which allows to crawl only new content","allTopics":["scrapy"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":3,"issueCount":3,"starsCount":77,"forksCount":23,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-10-31T19:28:53.430Z"}},{"type":"Public","name":"autologin","owner":"TeamHG-Memex","isFork":false,"description":"A project to attempt to automatically login to a website given a single seed","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":9,"starsCount":122,"forksCount":43,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-07-29T22:28:06.686Z"}},{"type":"Public","name":"scrapy-rotating-proxies","owner":"TeamHG-Memex","isFork":false,"description":"use multiple proxies with Scrapy","allTopics":["proxy","scrapy"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":8,"issueCount":44,"starsCount":721,"forksCount":157,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-05-20T01:40:55.157Z"}},{"type":"Public","name":"eli5","owner":"TeamHG-Memex","isFork":false,"description":"A library for debugging/inspecting machine learning classifiers and explaining their predictions","allTopics":["python","nlp","data-science","machine-learning","scikit-learn","xgboost","lightgbm","inspection","explanation","crfsuite"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":19,"issueCount":144,"starsCount":2736,"forksCount":333,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-05-01T15:53:37.199Z"}},{"type":"Public","name":"url-summary","owner":"TeamHG-Memex","isFork":false,"description":"Show summary of a large number of URLs in a Jupyter Notebook","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":17,"forksCount":9,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-06-08T19:11:38.614Z"}},{"type":"Public","name":"sitehound-frontend","owner":"TeamHG-Memex","isFork":false,"description":"Site Hound (previously THH) is a Domain Discovery Tool","allTopics":["topics","domain-discovery"],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":4,"issueCount":2,"starsCount":23,"forksCount":13,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-06-01T21:51:31.153Z"}},{"type":"Public","name":"autopager","owner":"TeamHG-Memex","isFork":false,"description":"Detect and classify pagination links","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":6,"starsCount":97,"forksCount":25,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-09-09T10:22:00.542Z"}},{"type":"Public","name":"html-text","owner":"TeamHG-Memex","isFork":false,"description":"Extract text from HTML","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":2,"issueCount":12,"starsCount":126,"forksCount":24,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-07-22T19:13:34.550Z"}},{"type":"Public","name":"docker-tor-rotator","owner":"TeamHG-Memex","isFork":true,"description":"A rotating socks proxy using Tor, Delegate and Haproxy","allTopics":[],"primaryLanguage":{"name":"Dockerfile","color":"#384d54"},"pullRequestCount":0,"issueCount":1,"starsCount":14,"forksCount":16,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-12-19T14:23:33.765Z"}},{"type":"Public","name":"aquarium","owner":"TeamHG-Memex","isFork":false,"description":"Splash + HAProxy + Docker Compose","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":24,"starsCount":194,"forksCount":43,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-11-29T18:23:23.522Z"}},{"type":"Public","name":"json-lines","owner":"TeamHG-Memex","isFork":false,"description":"Read JSON lines (jl) files, including gzipped and broken","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":34,"forksCount":9,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-11-21T12:33:41.459Z"}},{"type":"Public","name":"scrapy-kafka-export","owner":"TeamHG-Memex","isFork":false,"description":"Scrapy extension which writes crawled items to Kafka","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":29,"forksCount":9,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-11-08T13:25:28.495Z"}},{"type":"Public","name":"scrapy-cdr","owner":"TeamHG-Memex","isFork":false,"description":"Item definition and utils for storing items in CDR format for scrapy","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":6,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-10-29T08:10:40.317Z"}},{"type":"Public","name":"sitehound-backend","owner":"TeamHG-Memex","isFork":false,"description":"Sitehound's backend","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":5,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-10-17T21:31:25.168Z"}},{"type":"Public","name":"autologin-middleware","owner":"TeamHG-Memex","isFork":false,"description":"Scrapy middleware for the autologin","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":4,"starsCount":38,"forksCount":15,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:31:28.571Z"}},{"type":"Public","name":"undercrawler","owner":"TeamHG-Memex","isFork":false,"description":"A generic crawler","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":17,"starsCount":78,"forksCount":25,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:31:23.112Z"}},{"type":"Public","name":"domain-discovery-crawler","owner":"TeamHG-Memex","isFork":false,"description":"Broad crawler for domain discovery","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":19,"forksCount":10,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:31:18.118Z"}},{"type":"Public","name":"page-compare","owner":"TeamHG-Memex","isFork":false,"description":"Simple heuristic for measuring web page similarity (& data set)","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":1,"starsCount":90,"forksCount":18,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:31:12.461Z"}},{"type":"Public","name":"hh-page-classifier","owner":"TeamHG-Memex","isFork":false,"description":"Headless Horseman Page Classifier service","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":6,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:31:04.023Z"}},{"type":"Public","name":"deep-deep","owner":"TeamHG-Memex","isFork":false,"description":"Adaptive crawler which uses Reinforcement Learning methods","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":168,"forksCount":38,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:30:40.698Z"}},{"type":"Public","name":"scrash-lua-examples","owner":"TeamHG-Memex","isFork":false,"description":"A collection of example LUA scripts and JS utilities","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:30:34.318Z"}},{"type":"Public","name":"MaybeDont","owner":"TeamHG-Memex","isFork":false,"description":"A component that tries to avoid downloading duplicate content","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":27,"forksCount":14,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:29:51.480Z"}},{"type":"Public","name":"sitehound","owner":"TeamHG-Memex","isFork":false,"description":"This is the facade for installation and access to the individual components","allTopics":[],"primaryLanguage":{"name":"Shell","color":"#89e051"},"pullRequestCount":0,"issueCount":0,"starsCount":16,"forksCount":8,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:29:39.837Z"}},{"type":"Public","name":"sshadduser","owner":"TeamHG-Memex","isFork":false,"description":"A simple tool to add a new user with OpenSSH keys.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-05-29T21:29:33.657Z"}}],"repositoryCount":74,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}