Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to Flight SQL #440

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:

- name: Cache Homebrew Bundler RubyGems
id: cache
uses: actions/cache@v4.0.2
uses: actions/cache@v4
with:
path: ${{ steps.set-up-homebrew.outputs.gems-path }}
key: ${{ runner.os }}-rubygems-${{ steps.set-up-homebrew.outputs.gems-hash }}
Expand Down
3 changes: 2 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

source "https://rubygems.org"

gem "influxdb-client"
gem "rake" # missing dependency for red-arrow-flight
gem "red-arrow-flight-sql"
37 changes: 35 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,10 +1,43 @@
GEM
remote: https://rubygems.org/
specs:
influxdb-client (3.1.0)
bigdecimal (3.1.7)
csv (3.3.0)
extpp (0.1.1)
fiddle (1.1.2)
gio2 (4.2.2)
fiddle
gobject-introspection (= 4.2.2)
glib2 (4.2.2)
native-package-installer (>= 1.0.3)
pkg-config (>= 1.3.5)
gobject-introspection (4.2.2)
glib2 (= 4.2.2)
native-package-installer (1.1.9)
pkg-config (1.5.6)
rake (13.2.1)
red-arrow (16.0.0)
bigdecimal (>= 3.1.0)
csv
extpp (>= 0.1.1)
gio2 (>= 3.5.0)
native-package-installer
pkg-config
red-arrow-flight (16.0.0)
red-arrow (= 16.0.0)
red-arrow-flight-sql (16.0.0)
red-arrow-flight (= 16.0.0)

PLATFORMS
aarch64-linux
arm64-darwin
ruby
x86_64-darwin
x86_64-linux

DEPENDENCIES
influxdb-client
rake
red-arrow-flight-sql

BUNDLED WITH
2.4.18
193 changes: 86 additions & 107 deletions cmd/formula-analytics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,14 @@ def formula_analytics

Homebrew.install_bundler!
REPO_ROOT.cd do
if !BUNDLER_SETUP.exist? || !quiet_system("bundle", "check", "--path", "vendor/ruby")
safe_system "bundle", "install", "--standalone", "--path", "vendor/ruby", out: :err
ENV["BUNDLE_PATH"] = "vendor/ruby"
if !BUNDLER_SETUP.exist? || !quiet_system("bundle", "check")
Process.wait(fork do
# Native build scripts fail if EUID != UID
Process::UID.change_privilege(Process.euid) if Process.euid != Process.uid
exec "bundle", "install", "--standalone", out: :err
end)
raise "Failed to install gems" unless $CHILD_STATUS.success?
end
end

Expand All @@ -85,6 +91,7 @@ def formula_analytics
def influx_analytics(args)
require "utils/analytics"
require "json"
require "arrow-flight-sql"

token = if args.setup?
Utils::Analytics::INFLUX_TOKEN
Expand Down Expand Up @@ -127,9 +134,6 @@ def influx_analytics(args)

category_matching_buckets = [:build_error, :cask_install, :command_run, :test_bot_test]

# TODO: we don't seem to get a valid count for these categories, unclear why.
count_being_weird_categories = [:command_run_options, :test_bot_test]

categories.each do |category|
additional_where = all_core_formulae_json ? " AND tap_name =~ /homebrew\\/(core|cask)/" : ""
bucket = if category_matching_buckets.include?(category)
Expand Down Expand Up @@ -176,16 +180,17 @@ def influx_analytics(args)
groups = [:package, :tap_name, :options]
end

call_options = ArrowFlight::CallOptions.new
call_options.add_header("authorization", "Bearer #{token}")
call_options.add_header("database", Utils::Analytics::INFLUX_BUCKET)
client = ArrowFlight::Client.new("grpc+tls://eu-central-1-1.aws.cloud2.influxdata.com:443")
MikeMcQuaid marked this conversation as resolved.
Show resolved Hide resolved
sql_client = ArrowFlightSQL::Client.new(client)

query = <<~EOS
SELECT COUNT(*) AS "count" FROM "#{bucket}" WHERE time >= now() - #{days_ago}d#{additional_where} GROUP BY #{groups.map { |e| "\"#{e}\"" }.join(",")}
SELECT #{groups.map { |e| "\"#{e}\"" }.join(",")}, COUNT(*) AS "count" FROM "#{bucket}" WHERE time >= now() - interval '#{days_ago} days'#{additional_where} GROUP BY #{groups.map { |e| "\"#{e}\"" }.join(",")}
MikeMcQuaid marked this conversation as resolved.
Show resolved Hide resolved
EOS
api_result_text = Utils.safe_popen_read(Utils::Curl.curl_executable, "--fail", "--silent",
"--get", "#{Utils::Analytics::INFLUX_HOST}/query",
"--header", "Authorization: Token #{token}",
"--header", "Accept: application/json",
"--data-urlencode", "db=#{Utils::Analytics::INFLUX_BUCKET}",
"--data-urlencode", "q=#{query}")
api_result = JSON.parse(api_result_text)
endpoints = sql_client.execute(query, call_options).endpoints
odie "No endpoints found" if endpoints.empty?

json = {
category:,
Expand All @@ -196,108 +201,82 @@ def influx_analytics(args)
items: [],
}

odie "No data returned" unless api_result["results"].first.key? "series"

api_result["results"].first["series"].each do |result|
next unless result.key? "tags"

tags = result["tags"]
dimension = case category
when :homebrew_devcmdrun_developer
"devcmdrun=#{tags["devcmdrun"]} HOMEBREW_DEVELOPER=#{tags["developer"]}"
when :homebrew_os_arch_ci
if tags["ci"] == "true"
"#{tags["os"]} #{tags["arch"]} (CI)"
else
"#{tags["os"]} #{tags["arch"]}"
end
when :homebrew_prefixes
if tags["prefix"] == "custom-prefix"
"#{tags["prefix"]} (#{tags["os"]} #{tags["arch"]})"
else
(tags["prefix"]).to_s
end
when :os_versions
format_os_version_dimension(tags["os_name_and_version"])
when :command_run_options
"#{tags["command"]} #{tags["options"]}"
when :test_bot_test
command_and_package, options = tags["command"].split.partition { |arg| !arg.start_with?("-") }

# Cleanup bad data before https://github.com/Homebrew/homebrew-test-bot/pull/1043
# TODO: actually delete this from InfluxDB.
# Can delete this code after 27th April 2025.
next if %w[audit install linkage style test].exclude?(command_and_package.first)
next if command_and_package.last.include?("/")
next if options.include?("--tap=")
next if options.include?("--only-dependencies")
next if options.include?("--cached")

command_and_options = (command_and_package + options.sort).join(" ")
passed = (tags["passed"] == "true") ? "PASSED" : "FAILED"

"#{command_and_options} (#{tags["os"]} #{tags["arch"]}) (#{passed})"
else
tags[groups.first.to_s]
end
next if dimension.blank?

if (tap_name = tags["tap_name"].presence) &&
((tap_name != "homebrew/cask" && dimension_key == :cask) ||
(tap_name != "homebrew/core" && dimension_key == :formula))
dimension = "#{tap_name}/#{dimension}"
end

if (all_core_formulae_json || category == :build_error) &&
(options = tags["options"].presence)
# homebrew/core formulae don't have non-HEAD options but they ended up in our analytics anyway.
if all_core_formulae_json
options = options.split.include?("--HEAD") ? "--HEAD" : ""
end
dimension = "#{dimension} #{options}"
end

dimension = dimension.strip
next if dimension.match?(/[<>]/)

# we want any valid count that isn't the time field
count = nil
result["values"].first.compact.drop(1).find do |possible_count|
break if count.present?

count ||= begin
if possible_count.is_a?(Integer)
possible_count
elsif possible_count.is_a?(String)
Integer(possible_count, 10)
endpoints.each do |endpoint|
reader = sql_client.do_get(endpoint.ticket, call_options)
reader.each do |record_batch|
odie "Empty record batch" if record_batch.data.size.zero? # rubocop:disable Style/ZeroLengthPredicate

record_batch.data.each do |record|
dimension = case category
when :homebrew_devcmdrun_developer
"devcmdrun=#{record["devcmdrun"]} HOMEBREW_DEVELOPER=#{record["developer"]}"
when :homebrew_os_arch_ci
if record["ci"] == "true"
"#{record["os"]} #{record["arch"]} (CI)"
else
"#{record["os"]} #{record["arch"]}"
end
when :homebrew_prefixes
if record["prefix"] == "custom-prefix"
"#{record["prefix"]} (#{record["os"]} #{record["arch"]})"
else
(record["prefix"]).to_s
end
when :os_versions
format_os_version_dimension(record["os_name_and_version"])
when :command_run_options
"#{record["command"]} #{record["options"]}"
when :test_bot_test
command_and_package, options = record["command"].split.partition { |arg| !arg.start_with?("-") }

# Cleanup bad data before https://github.com/Homebrew/homebrew-test-bot/pull/1043
# TODO: actually delete this from InfluxDB.
# Can delete this code after 27th April 2025.
next if %w[audit install linkage style test].exclude?(command_and_package.first)
next if command_and_package.last.include?("/")
next if options.include?("--tap=")
next if options.include?("--only-dependencies")
next if options.include?("--cached")

command_and_options = (command_and_package + options.sort).join(" ")
passed = (record["passed"] == "true") ? "PASSED" : "FAILED"

"#{command_and_options} (#{record["os"]} #{record["arch"]}) (#{passed})"
else
Integer(possible_count)
record[groups.first.to_s]
end
rescue ArgumentError, TypeError
nil
end

next if count <= 0
next if dimension.blank?

count
end
if (tap_name = record["tap_name"].presence) &&
((tap_name != "homebrew/cask" && dimension_key == :cask) ||
(tap_name != "homebrew/core" && dimension_key == :formula))
dimension = "#{tap_name}/#{dimension}"
end

# TODO: we don't seem to get a valid count for these categories, unclear why.
count ||= 1 if count_being_weird_categories.include?(category)
if (all_core_formulae_json || category == :build_error) &&
(options = record["options"].presence)
# homebrew/core formulae don't have non-HEAD options but they ended up in our analytics anyway.
if all_core_formulae_json
options = options.split.include?("--HEAD") ? "--HEAD" : ""
end
dimension = "#{dimension} #{options}"
end

odie "Invalid amount of items" if count.blank?
dimension = dimension.strip
next if dimension.match?(/[<>]/)

# Ignore values with a 0 count, means there are too few events to be useful.
next if count.zero?
count = record["count"]

json[:total_items] += 1
json[:total_count] += count
json[:total_items] += 1
json[:total_count] += count

json[:items] << {
number: nil,
dimension_key => dimension,
count:,
}
json[:items] << {
number: nil,
dimension_key => dimension,
count:,
}
end
end
end

# Combine identical values
Expand Down