diff --git a/instrumentation/base/Appraisals b/instrumentation/base/Appraisals new file mode 100644 index 0000000000..b9f4a41c3e --- /dev/null +++ b/instrumentation/base/Appraisals @@ -0,0 +1,6 @@ +appraise "base" do + remove_gem "opentelemetry-metrics-api" +end + +appraise "metrics-api" do +end diff --git a/instrumentation/base/Gemfile b/instrumentation/base/Gemfile index f649e2f64a..07d2b5728d 100644 --- a/instrumentation/base/Gemfile +++ b/instrumentation/base/Gemfile @@ -6,4 +6,6 @@ source 'https://rubygems.org' +gem 'opentelemetry-metrics-api' + gemspec diff --git a/instrumentation/base/lib/opentelemetry/instrumentation/base.rb b/instrumentation/base/lib/opentelemetry/instrumentation/base.rb index ddf8289551..8d0852666f 100644 --- a/instrumentation/base/lib/opentelemetry/instrumentation/base.rb +++ b/instrumentation/base/lib/opentelemetry/instrumentation/base.rb @@ -4,6 +4,11 @@ # # SPDX-License-Identifier: Apache-2.0 +begin + require 'opentelemetry-metrics-api' +rescue LoadError +end + module OpenTelemetry module Instrumentation # The Base class holds all metadata and configuration for an @@ -69,8 +74,9 @@ class << self integer: ->(v) { v.is_a?(Integer) }, string: ->(v) { v.is_a?(String) } }.freeze + SINGLETON_MUTEX = Thread::Mutex.new - private_constant :NAME_REGEX, :VALIDATORS + private_constant :NAME_REGEX, :VALIDATORS, :SINGLETON_MUTEX private :new @@ -163,13 +169,51 @@ def option(name, default:, validate:) end def instance - @instance ||= new(instrumentation_name, instrumentation_version, install_blk, - present_blk, compatible_blk, options) + @instance || SINGLETON_MUTEX.synchronize do + @instance ||= new(instrumentation_name, instrumentation_version, install_blk, + present_blk, compatible_blk, options, instrument_configs) + end + end + + if defined?(OpenTelemetry::Metrics) + %i[ + counter + observable_counter + histogram + gauge + observable_gauge + up_down_counter + observable_up_down_counter + ].each do |instrument_kind| + define_method(instrument_kind) do |name, **opts, &block| + opts[:callback] ||= block + register_instrument(instrument_kind, name, **opts) + end + end + + def register_instrument(kind, name, **opts) + @instrument_configs ||= {} + + key = [kind, name] + if @instrument_configs.key?(key) + warn("Duplicate instrument configured for #{self}: #{key.inspect}") + else + @instrument_configs[key] = opts + end + end + else + def counter(*, **); end + def observable_counter(*, **); end + def histogram(*, **); end + def gauge(*, **); end + def observable_gauge(*, **); end + def up_down_counter(*, **); end + def observable_up_down_counter(*, **); end end private - attr_reader :install_blk, :present_blk, :compatible_blk, :options + attr_reader :install_blk, :present_blk, :compatible_blk, :options, :instrument_configs def infer_name @inferred_name ||= if (md = name.match(NAME_REGEX)) # rubocop:disable Naming/MemoizedInstanceVariableName @@ -177,6 +221,10 @@ def infer_name end end + def metrics_defined? + defined?(OpenTelemetry::Metrics) + end + def infer_version return unless (inferred_name = infer_name) @@ -189,13 +237,13 @@ def infer_version end end - attr_reader :name, :version, :config, :installed, :tracer + attr_reader :name, :version, :config, :installed, :tracer, :meter, :instrument_configs alias installed? installed # rubocop:disable Metrics/ParameterLists def initialize(name, version, install_blk, present_blk, - compatible_blk, options) + compatible_blk, options, instrument_configs) @name = name @version = version @install_blk = install_blk @@ -204,7 +252,9 @@ def initialize(name, version, install_blk, present_blk, @config = {} @installed = false @options = options - @tracer = OpenTelemetry::Trace::Tracer.new + @tracer = OpenTelemetry::Trace::Tracer.new # default no-op tracer + @meter = OpenTelemetry::Metrics::Meter.new if defined?(OpenTelemetry::Metrics::Meter) # default no-op meter + @instrument_configs = instrument_configs || {} end # rubocop:enable Metrics/ParameterLists @@ -217,10 +267,21 @@ def install(config = {}) return true if installed? @config = config_options(config) + + @metrics_enabled = compute_metrics_enabled + + if metrics_defined? + @metrics_instruments = {} + @instrument_mutex = Mutex.new + end + return false unless installable?(config) - instance_exec(@config, &@install_blk) @tracer = OpenTelemetry.tracer_provider.tracer(name, version) + @meter = OpenTelemetry.meter_provider.meter(name, version: version) if metrics_enabled? + + instance_exec(@config, &@install_blk) + @installed = true end @@ -261,8 +322,76 @@ def enabled?(config = nil) true end + # This is based on a variety of factors, and should be invalidated when @config changes. + # It should be explicitly set in `initialize` for now. + def metrics_enabled? + !!@metrics_enabled + end + + # @api private + # ONLY yields if the meter is enabled. + def with_meter + yield @meter if metrics_enabled? + end + + if defined?(OpenTelemetry::Metrics) + %i[ + counter + observable_counter + histogram + gauge + observable_gauge + up_down_counter + observable_up_down_counter + ].each do |kind| + define_method(kind) do |name| + get_metrics_instrument(kind, name) + end + end + end + private + def metrics_defined? + defined?(OpenTelemetry::Metrics) + end + + def get_metrics_instrument(kind, name) + # FIXME: we should probably return *something* + # if metrics is not enabled, but if the api is undefined, + # it's unclear exactly what would be suitable. + # For now, there are no public methods that call this + # if metrics isn't defined. + return unless metrics_defined? + + @metrics_instruments.fetch([kind, name]) do |key| + @instrument_mutex.synchronize do + @metrics_instruments[key] ||= create_configured_instrument(kind, name) + end + end + end + + def create_configured_instrument(kind, name) + config = @instrument_configs[[kind, name]] + + # FIXME: what is appropriate here? + if config.nil? + Kernel.warn("unconfigured instrument requested: #{kind} of '#{name}'") + return + end + + # FIXME: some of these have different opts; + # should verify that they work before this point. + meter.public_send(:"create_#{kind}", name, **config) + end + + def compute_metrics_enabled + return false unless defined?(OpenTelemetry::Metrics) + return false if metrics_disabled_by_env_var? + + !!@config[:metrics] || metrics_enabled_by_env_var? + end + # The config_options method is responsible for validating that the user supplied # config hash is valid. # Unknown configuration keys are not included in the final config hash. @@ -317,13 +446,42 @@ def config_options(user_config) # will be OTEL_RUBY_INSTRUMENTATION_SINATRA_ENABLED. A value of 'false' will disable # the instrumentation, all other values will enable it. def enabled_by_env_var? + !disabled_by_env_var? + end + + def disabled_by_env_var? var_name = name.dup.tap do |n| n.upcase! n.gsub!('::', '_') n.gsub!('OPENTELEMETRY_', 'OTEL_RUBY_') n << '_ENABLED' end - ENV[var_name] != 'false' + ENV[var_name] == 'false' + end + + # Checks if this instrumentation's metrics are enabled by env var. + # This follows the conventions as outlined above, using `_METRICS_ENABLED` as a suffix. + # Unlike INSTRUMENTATION_*_ENABLED variables, these are explicitly opt-in (i.e. + # if the variable is unset, and `metrics: true` is not in the instrumentation's config, + # the metrics will not be enabled) + def metrics_enabled_by_env_var? + ENV.key?(metrics_env_var_name) && ENV[metrics_env_var_name] != 'false' + end + + def metrics_disabled_by_env_var? + ENV[metrics_env_var_name] == 'false' + end + + def metrics_env_var_name + @metrics_env_var_name ||= + begin + var_name = name.dup + var_name.upcase! + var_name.gsub!('::', '_') + var_name.gsub!('OPENTELEMETRY_', 'OTEL_RUBY_') + var_name << '_METRICS_ENABLED' + var_name + end end # Checks to see if the user has passed any environment variables that set options diff --git a/instrumentation/base/opentelemetry-instrumentation-base.gemspec b/instrumentation/base/opentelemetry-instrumentation-base.gemspec index 4939037352..7a5e8665eb 100644 --- a/instrumentation/base/opentelemetry-instrumentation-base.gemspec +++ b/instrumentation/base/opentelemetry-instrumentation-base.gemspec @@ -29,6 +29,7 @@ Gem::Specification.new do |spec| spec.add_dependency 'opentelemetry-common', '~> 0.21' spec.add_dependency 'opentelemetry-registry', '~> 0.1' + spec.add_development_dependency 'appraisal', '~> 2.5' spec.add_development_dependency 'bundler', '~> 2.4' spec.add_development_dependency 'minitest', '~> 5.0' spec.add_development_dependency 'opentelemetry-test-helpers', '~> 0.3' diff --git a/instrumentation/base/test/instrumentation/base_test.rb b/instrumentation/base/test/instrumentation/base_test.rb index c58bbe5003..a7fbc09d9d 100644 --- a/instrumentation/base/test/instrumentation/base_test.rb +++ b/instrumentation/base/test/instrumentation/base_test.rb @@ -53,15 +53,86 @@ def initialize(*args) end end + let(:instrumentation_with_metrics) do + Class.new(OpenTelemetry::Instrumentation::Base) do + instrumentation_name 'test_instrumentation' + instrumentation_version '0.1.1' + + option :metrics, default: false, validate: :boolean + + if defined?(OpenTelemetry::Metrics) + counter 'example.counter' + observable_counter 'example.observable_counter' + histogram 'example.histogram' + gauge 'example.gauge' + observable_gauge 'example.observable_gauge' + up_down_counter 'example.up_down_counter' + observable_up_down_counter 'example.observable_up_down_counter' + end + + def example_counter + counter 'example.counter' + end + + def example_observable_counter + observable_counter 'example.observable_counter' + end + + def example_histogram + histogram 'example.histogram' + end + + def example_gauge + gauge 'example.gauge' + end + + def example_observable_gauge + observable_gauge 'example.observable_gauge' + end + + def example_up_down_counter + up_down_counter 'example.up_down_counter' + end + + def example_observable_up_down_counter + observable_up_down_counter 'example.observable_up_down_counter' + end + end + end + it 'is auto-registered' do instance = instrumentation.instance _(OpenTelemetry::Instrumentation.registry.lookup('test_instrumentation')).must_equal(instance) end describe '.instance' do + let(:instrumentation) do + Class.new(OpenTelemetry::Instrumentation::Base) do + instrumentation_name 'test_instrumentation' + instrumentation_version '0.1.1' + + def initialize(*args) + # Simulate latency by hinting the VM should switch tasks + # (this can also be accomplished by something like `sleep(0.1)`). + # This replicates the worst-case scenario when using default assignment + # to obtain a singleton, i.e. that the scheduler switches threads between + # the nil check and object initialization. + Thread.pass + super + end + end + end + it 'returns an instance' do _(instrumentation.instance).must_be_instance_of(instrumentation) end + + it 'returns the same singleton instance to every thread' do + object_ids = Array.new(2).map { Thread.new { instrumentation.instance } } + .map { |thr| thr.join.value } + + _(object_ids.uniq.count).must_equal(1) + end end describe '.option' do @@ -442,6 +513,55 @@ def initialize(*args) end end + describe 'metrics' do + let(:config) { {} } + let(:instance) { instrumentation_with_metrics.instance } + + before do + instance.install(config) + end + + if defined?(OpenTelemetry::Metrics) + describe 'with the metrics api' do + it 'is disabled by default' do + _(instance.metrics_enabled?).must_equal false + end + + it 'returns a no-op counter' do + counter = instance.example_counter + _(counter).must_be_kind_of(OpenTelemetry::Metrics::Instrument::Counter) + end + + describe 'with the option enabled' do + let(:config) { { metrics: true } } + + it 'will be enabled' do + _(instance.metrics_enabled?).must_equal true + end + + it 'returns a no-op counter' do + counter = instance.example_counter + _(counter).must_be_kind_of(OpenTelemetry::Metrics::Instrument::Counter) + end + end + end + else + describe 'without the metrics api' do + it 'will not be enabled' do + _(instance.metrics_enabled?).must_equal false + end + + describe 'with the option enabled' do + let(:config) { { metrics: true } } + + it 'will not be enabled' do + _(instance.metrics_enabled?).must_equal false + end + end + end + end + end + def define_instrumentation_subclass(name, version = nil) names = name.split('::').map(&:to_sym) names.inject(Object) do |object, const| diff --git a/instrumentation/system-metrics/Appraisals b/instrumentation/system-metrics/Appraisals new file mode 100644 index 0000000000..be32fe2be8 --- /dev/null +++ b/instrumentation/system-metrics/Appraisals @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +appraise "metrics-sdk" do +end + +appraise "metrics-api" do + remove_gem "opentelemetry-metrics-sdk" +end + +appraise "base" do + remove_gem "opentelemetry-metrics-sdk" + remove_gem "opentelemetry-metrics-api" +end diff --git a/instrumentation/system-metrics/Gemfile b/instrumentation/system-metrics/Gemfile new file mode 100644 index 0000000000..5f35ae4f9d --- /dev/null +++ b/instrumentation/system-metrics/Gemfile @@ -0,0 +1,25 @@ +# Generated from /Users/zach/workspace/opentelemetry-ruby-contrib/instrumentation/system-metrics/opentelemetry-instrumentation-system-metrics.gemspec +source 'https://rubygems.org' + +gem 'opentelemetry-instrumentation-base', path: '../base' + +# FIXME: the metrics-api is behind the metrics-sdk gem for some reason; bundle from git for now +OTEL_RUBY_GEM = lambda do |short_name| + short_name = short_name.split(/-|_/) + long_name = ['opentelemetry', *short_name].join('-') + + gem long_name, + git: 'https://www.github.com/zvkemp/opentelemetry-ruby', + glob: "#{short_name.join('_')}/*.gemspec", + ref: '9b46ceb3' +end + +OTEL_RUBY_GEM['metrics-api'] +OTEL_RUBY_GEM['metrics-sdk'] + +# gem 'opentelemetry-metrics-api', path: '/Users/zach/workspace/opentelemetry-ruby/metrics_api' +# gem 'opentelemetry-metrics-sdk', path: '/Users/zach/workspace/opentelemetry-ruby/metrics_sdk' + +gem 'pry-byebug' + +gemspec diff --git a/instrumentation/system-metrics/Rakefile b/instrumentation/system-metrics/Rakefile new file mode 100644 index 0000000000..987b46341e --- /dev/null +++ b/instrumentation/system-metrics/Rakefile @@ -0,0 +1,23 @@ +require 'bundler/gem_tasks' +require 'rake/testtask' +require 'yard' +require 'rubocop/rake_task' + +RuboCop::RakeTask.new + +Rake::TestTask.new :test do |t| + t.libs << 'test' + t.libs << 'lib' + t.test_files = FileList['test/**/*_test.rb'] + t.warning = false +end + +YARD::Rake::YardocTask.new do |t| + t.stats_options = ['--list-undoc'] +end + +if RUBY_ENGINE == 'truffleruby' + task default: %i[test] +else + task default: %i[test rubocop yard] +end diff --git a/instrumentation/system-metrics/lib/opentelemetry-instrumentation-system.rb b/instrumentation/system-metrics/lib/opentelemetry-instrumentation-system.rb new file mode 100644 index 0000000000..c034f140f8 --- /dev/null +++ b/instrumentation/system-metrics/lib/opentelemetry-instrumentation-system.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require_relative 'opentelemetry/instrumentation' diff --git a/instrumentation/system-metrics/lib/opentelemetry/instrumentation.rb b/instrumentation/system-metrics/lib/opentelemetry/instrumentation.rb new file mode 100644 index 0000000000..feb11d9d46 --- /dev/null +++ b/instrumentation/system-metrics/lib/opentelemetry/instrumentation.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +# OpenTelemetry is an open source observability framework, providing a +# general-purpose API, SDK, and related tools required for the instrumentation +# of cloud-native software, frameworks, and libraries. +# +# The OpenTelemetry module provides global accessors for telemetry objects. +# See the documentation for the `opentelemetry-api` gem for details. +module OpenTelemetry + # "Instrumentation" are specified by + # https://github.com/open-telemetry/opentelemetry-specification/blob/784635d01d8690c8f5fcd1f55bdbc8a13cf2f4f2/specification/glossary.md#instrumentation-library + # + # Instrumentation should be able to handle the case when the library is not installed on a user's system. + module Instrumentation + end +end + +require_relative 'instrumentation/system' diff --git a/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system.rb b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system.rb new file mode 100644 index 0000000000..52f9607745 --- /dev/null +++ b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'opentelemetry' +require 'opentelemetry-instrumentation-base' + +module OpenTelemetry + module Instrumentation + # (see {OpenTelemetry::Instrumentation::System::Instrumentation}) + module System + end + end +end + +require_relative 'system/version' +require_relative 'system/platform' +require_relative 'system/instrumentation' +require 'opentelemetry/common' diff --git a/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/instrumentation.rb b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/instrumentation.rb new file mode 100644 index 0000000000..92de60b43a --- /dev/null +++ b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/instrumentation.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +module OpenTelemetry + module Instrumentation + module System + class Instrumentation < Base + option :process_metrics, default: true, validate: :boolean + option :system_metrics, default: false, validate: :boolean + # an option called `metrics` must be set in order to use the SDK meter + option :metrics, default: true, validate: :boolean + + compatible do + # FIXME: implement this + true + end + + if defined?(OpenTelemetry::Metrics) + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processcputime + observable_counter('process.cpu.time', unit: 's') do |obs| + instance.maybe_observe(obs, 'cpu.mode' => 'user', &:cpu_time_user) + instance.maybe_observe(obs, 'cpu.mode' => 'system', &:cpu_time_system) + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processcpuutilization + # FIXME: what's up with this unit + observable_gauge('process.cpu.utilization', unit: '1', disabled: true) do + # FIXME: attr { "cpu.mode": ['user', 'system', ...] } + # FIXME: impl + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processmemoryusage + observable_up_down_counter('process.memory.usage', unit: 'By') do |obs| + instance.maybe_observe(obs, &:process_memory_usage) + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processmemoryvirtual + observable_up_down_counter('process.memory.virtual', unit: 'By') do |obs| + instance.maybe_observe(obs, &:process_memory_virtual) + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processdiskio + observable_counter('process.disk.io', unit: 'By', disabled: true) do + # FIXME: implement me - unclear how to proceed on this one. + # System-level metric would make more sense + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processnetworkio + observable_counter('process.network.io', unit: 'By', disabled: true) do + # FIXME: implement me - unclear how to proceed on this one. + # System-level metric would make more sense + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processthreadcount + observable_up_down_counter('process.thread.count') do |obs| + # FIXME: should these be green threads or OS threads? + obs.observe(Thread.list.size) + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processcontext_switches + observable_counter('process.context_switches') do |obs| + instance.maybe_observe(obs, 'process.context_switch_type' => 'voluntary', &:voluntary_context_switches) + instance.maybe_observe(obs, 'process.context_switch_type' => 'involuntary', &:involuntary_context_switches) + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processopen_file_descriptorcount + observable_up_down_counter('process.open_file_descriptor.count') do |obs| + # TODO: may not be the most efficient way, but it should be correct + obs.observe(ObjectSpace.each_object(IO).count { |x| !x.closed? }) + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processpagingfaults + observable_counter('process.paging.faults') do |obs| + instance.maybe_observe(obs, 'process.paging.fault_type' => 'major', &:page_faults_major) + instance.maybe_observe(obs, 'process.paging.fault_type' => 'minor', &:page_faults_minor) + end + + # https://opentelemetry.io/docs/specs/semconv/system/process-metrics/#metric-processuptime + observable_gauge('process.uptime', unit: 's') do |obs| + instance.maybe_observe(obs, &:process_uptime) + end + + # FIXME: upstream to semconv + observable_counter('process.runtime.gc_count') { |obs| obs.observe(GC.count) } + end + + install do |config| + load_platform + start_asynchronous_instruments(config) + end + + present do + defined?(OpenTelemetry::Metrics) && platform_supported? + end + + attr_reader :platform + + def maybe_observe(observations, attributes = {}) + if (value = yield(self)) + observations.observe(value, attributes) + end + end + + def cpu_time_system + current_data.cpu_time_system + end + + def cpu_time_user + current_data.cpu_time_user + end + + def process_memory_usage + current_data.process_memory_usage + end + + def process_memory_virtual + current_data.process_memory_virtual + end + + def voluntary_context_switches + current_data.voluntary_context_switches + end + + def involuntary_context_switches + current_data.involuntary_context_switches + end + + def page_faults_minor + current_data.page_faults_minor + end + + def page_faults_major + current_data.page_faults_major + end + + def process_uptime + current_data.process_uptime + end + + private + + def current_data + platform.fetch_current + end + + def start_asynchronous_instruments(config) + return unless config[:metrics] + + start_namespaced_instruments('process.') if config[:process_metrics] + start_namespaced_instruments('system.') if config[:system_metrics] + end + + def start_namespaced_instruments(namespace) + @instrument_configs.each do |(type, name), instrument_config| + next unless name.start_with?(namespace) + + # NOTE: this key exists on the config to allow for semconv-defined + # instruments that are unimplemented here. + next if instrument_config[:disabled] + next unless configured?(name) + + # instantiate the async instrument + public_send(type, name) + end + end + + def configured?(metric_name) + true + end + + def load_platform + @platform = Platform.impl&.new + end + + def platform_supported? + !Platform.impl.nil? + end + end + end + end +end diff --git a/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/platform.rb b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/platform.rb new file mode 100644 index 0000000000..c49fdb529c --- /dev/null +++ b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/platform.rb @@ -0,0 +1,468 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +module OpenTelemetry + module Instrumentation + module System + module Platform + def self.impl + case RbConfig::CONFIG['host_os'] + when /darwin/ + Darwin::Platform + when /linux/ + Linux::Platform + end + end + + class AbstractPlatform + attr_reader :data_source + + def fetch_current + data_source.fetch_current + end + end + + class AbstractDataSource + TTL = 15 # FIXME: sensible default? + + attr_reader :cache + + def initialize(ttl: TTL) + @ttl = ttl + @cache = {} + @mutex = Mutex.new + end + + def fetch(pid) + @mutex.synchronize do + last_fetched_at, data = cache.fetch(pid) do + return refresh(pid) + end + + return data if last_fetched_at + @ttl > Time.now.to_i + + refresh(pid) + end + end + + def fetch_current + fetch(Process.pid) + end + + private + + def refresh(pid) + data = parse_ps(pid) + cache[pid] = [Time.now.to_i, data] + data + end + + def parse_ps(pid) + data_class.parse(fetch_raw_data(pid)) + end + + def data_class + PSData + end + + def fetch_raw_data(_pid) + '' + end + end + + class AbstractData + def self.parse(_raw) + new({}) + end + + def initialize(parsed) + @data = parsed + end + + attr_reader :data + + def cpu_time_user; end + def cpu_time_system; end + def process_memory_usage; end + def process_memory_virtual; end + def voluntary_context_switches; end + def involuntary_context_switches; end + def page_faults_major; end + def page_faults_minor; end + def process_uptime; end + end + + module Darwin + class Platform < AbstractPlatform + def initialize + @data_source = PSDataSource.new + super + end + end + + class PSDataSource < AbstractDataSource + private + + def data_class + PSData + end + + def fetch_raw_data(pid) + `ps -p #{pid} -O utime,time,rss,vsz,nvcsw,nivcsw,majflt,etime` + end + end + + class PSData < AbstractData + def self.parse(raw) + output = raw.lines + header = output.shift.strip.split(/\s+/) + new(header.zip(output.first.strip.split(/\s+/)).reject { |_, v| v == '-' }.to_h) + end + + def cpu_time_user + return unless (raw_utime = data['UTIME']) + + parse_ps_time(raw_utime).to_i + end + + def cpu_time_system + return unless (utime = cpu_time_user) && (raw_time = data['TIME']) + + parse_ps_time(raw_time).to_i - utime + end + + def process_memory_usage + data['RSS']&.to_i + end + + def process_memory_virtual + data['VSZ']&.to_i + end + + # FIXME: on at least one macos version/architecture, these are all blank + def voluntary_context_switches + data['NVCSW'] + end + + # FIXME: on at least one macos version/architecture, these are all blank + def involuntary_context_switches + data['NIVCSW'] + end + + # FIXME: on at least one macos version/architecture, these are all blank + def page_faults_major + data['MAJFLT'] + end + + # Instrumentations SHOULD use a gauge with type double and measure uptime in seconds as a + # floating point number with the highest precision available + def process_uptime + return unless (raw_etime = data['ELAPSED']) # aka 'etime' + + parse_ps_time(raw_etime) + end + + private + + def parse_ps_time(str) + time, days = str.split('-', 2).reverse + days = days ? days.to_i : 0 + + seconds, minutes, hours = time.split(':', 3).reverse + + ( + (Integer(days || 0) * 86_400) \ + + (Integer(hours || 0) * 3600) \ + + (Integer(minutes || 0) * 60) \ + + Float(seconds) + ) + end + end + end + + module Linux + def self.clock_tick + # should be safe not to mutex this + @clock_tick ||= begin + require 'etc' + Etc.sysconf(Etc::SC_CLK_TCK) + end + end + + class Platform < AbstractPlatform + def initialize + @data_source = ProcCompoundDataSource.new + end + end + + class ProcStatData < AbstractData + # https://man7.org/linux/man-pages/man5/proc_pid_stat.5.html + + FIELDS = %w[ + pid + comm + state + ppid + pgrp + session + tty_nr + tpgid + flags + minflt + cminflt + majflt + cmajflt + utime + stime + cutime + cstime + priority + nice + num_threads + itrealvalue + starttime + vsize + rss + rsslim + startcode + endcode + startstack + kstkesp + kstkeip + signal + blocked + sigignore + sigcatch + wchan + nswap + cnswap + exit_signal + processor + rt_priority + policy + delayacct_blkio_ticks + guest_time + cguest_time + start_data + end_data + start_brk + arg_start + arg_end + env_start + env_end + exit_code + ].freeze + + def self.parse(raw) + # process name is always in brackets, but may contain brackets itself. + # There won't be brackets to the right of the last enclosing bracket. + comm_start = raw.index('(') + comm_end = raw.rindex(')') + + pid = raw[0...comm_start].strip + comm = raw[comm_start..comm_end] + rest = raw[(comm_end + 1)..-1].strip.split(/\s+/) + + new(FIELDS.zip([pid, comm, *rest]).to_h) + end + + def cpu_time_user + utime_clock_ticks&./(Linux.clock_tick) + end + + def cpu_time_system + stime_clock_ticks&./(Linux.clock_tick) + end + + def process_memory_usage + # NOTE: rss is known to be inaccurate + # Some of these values are inaccurate because of a kernel- + # internal scalability optimization. If accurate values are + # required, use /proc/pid/smaps or /proc/pid/smaps_rollup + # instead, which are much slower but provide accurate, + # detailed information. + data['rss']&.to_i + end + + def process_memory_virtual + data['vsize']&.to_i + end + + def page_faults_major + data['majflt']&.to_i + end + + def page_faults_minor + data['minflt']&.to_i + end + + if defined?(Process::CLOCK_BOOTTIME) + def process_uptime + return unless (ticks = start_time_ticks) + + # FIXME: does Process::CLOCK_BOOTTIME need to be cached on this snapshot? + + (ticks.to_f / Linux.clock_tick) - Process.clock_gettime(Process::CLOCK_BOOTTIME) + end + else + def process_uptime + # In practice this should never be called, except perhaps in tests running on non-linux platforms + end + end + + private + + def utime_clock_ticks + data['utime']&.to_i + end + + def stime_clock_ticks + data['stime']&.to_i + end + + def start_time_ticks + # The time the process started after system boot. + data['starttime']&.to_i + end + end + + class ProcStatDataSouce < AbstractDataSource + private + + def data_class + ProcStatData + end + + def fetch_raw_data(pid) + File.read("/proc/#{pid}/stat") + end + end + + class ProcStatusData < AbstractData + def self.parse(raw) + data = {} + raw.lines.each do |line| + key, value = line.strip.split(":\t") + + next unless key && value + + data[key] = value + end + + new(data) + end + + def cpu_time_user; end + def cpu_time_system; end + def process_memory_usage; end + def process_memory_virtual; end + + def voluntary_context_switches + data['voluntary_ctxt_switches']&.to_i + end + + def involuntary_context_switches + data['nonvoluntary_ctxt_switches']&.to_i + end + + def page_faults_major; end + def page_faults_minor; end + def process_uptime; end + end + + class ProcStatusDataSource < AbstractDataSource + private + + def data_class + ProcStatusData + end + + def fetch_raw_data(pid) + File.read("/proc/#{pid}/status") + end + end + + # FIXME: this might be overkill; stat is a better source for most of this data + # but context switches aren't listed there. + class ProcCompoundData < AbstractData + def self.parse(raw) + status = ProcStatusData.parse(raw[:status]) if raw[:status] + + stat = ProcStatData.parse(raw[:stat]) if raw[:stat] + + new(stat: stat, status: status) + end + + def initialize(data) + super(nil) + @stat = data[:stat] + @status = data[:status] + end + + attr_reader :stat, :status + + def cpu_time_user + stat&.cpu_time_user + end + + def cpu_time_system + stat&.cpu_time_system + end + + def process_memory_usage + stat&.process_memory_usage + end + + def process_memory_virtual + stat&.process_memory_virtual + end + + def voluntary_context_switches + status&.voluntary_context_switches + end + + def involuntary_context_switches + status&.involuntary_context_switches + end + + def page_faults_major + stat&.page_faults_major + end + + def page_faults_minor + stat&.page_faults_minor + end + + def process_uptime + stat&.process_uptime + end + end + + class ProcCompoundDataSource < AbstractDataSource + def initialize + super + @proc_status_data_source = ProcStatusDataSource.new + @proc_stat_data_source = ProcStatDataSource.new + end + + private + + def data_class + ProcCompoundData + end + + def fetch_raw_data(pid) + { + status: @proc_status_data_source.send(:fetch_raw_data, pid), + stat: @proc_stat_data_source.send(:fetch_raw_data, pid) + } + end + end + end + end + end + end +end diff --git a/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/version.rb b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/version.rb new file mode 100644 index 0000000000..e1d02feaa1 --- /dev/null +++ b/instrumentation/system-metrics/lib/opentelemetry/instrumentation/system/version.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +module OpenTelemetry + module Instrumentation + module System + VERSION = '0.0.1' + end + end +end diff --git a/instrumentation/system-metrics/opentelemetry-instrumentation-system-metrics.gemspec b/instrumentation/system-metrics/opentelemetry-instrumentation-system-metrics.gemspec new file mode 100644 index 0000000000..d1851b5ba3 --- /dev/null +++ b/instrumentation/system-metrics/opentelemetry-instrumentation-system-metrics.gemspec @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +lib = File.expand_path('lib', __dir__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'opentelemetry/instrumentation/system/version' + +Gem::Specification.new do |spec| + spec.name = 'opentelemetry-instrumentation-system-metrics' + spec.version = OpenTelemetry::Instrumentation::System::VERSION + spec.authors = ['OpenTelemetry Authors'] + spec.email = ['cncf-opentelemetry-contributors@lists.cncf.io'] + + spec.summary = 'System and process metrics for the OpenTelemetry framework' + spec.description = 'System and process metrics for the OpenTelemetry framework' + spec.homepage = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib' + spec.license = 'Apache-2.0' + + spec.files = Dir.glob('lib/**/*.rb') + + Dir.glob('*.md') + + ['LICENSE', '.yardopts'] + spec.require_paths = ['lib'] + spec.required_ruby_version = '>= 3.0' + + spec.add_dependency 'opentelemetry-api', '~> 1.0' + spec.add_dependency 'opentelemetry-instrumentation-base', '~> 0.22.1' + + spec.add_development_dependency 'appraisal', '~> 2.5' + spec.add_development_dependency 'bundler', '~> 2.4' + spec.add_development_dependency 'minitest', '~> 5.0' + spec.add_development_dependency 'minitest-reporters', '~> 1.0' + spec.add_development_dependency 'opentelemetry-sdk', '~> 1.1' + spec.add_development_dependency 'opentelemetry-test-helpers', '~> 0.3' + spec.add_development_dependency 'rspec-mocks' + spec.add_development_dependency 'rubocop', '~> 1.69.1' + spec.add_development_dependency 'rubocop-performance', '~> 1.23.0' + spec.add_development_dependency 'simplecov', '~> 0.17.1' + spec.add_development_dependency 'yard', '~> 0.9' + + if spec.respond_to?(:metadata) + spec.metadata['changelog_uri'] = "https://rubydoc.info/gems/#{spec.name}/#{spec.version}/file/CHANGELOG.md" + spec.metadata['source_code_uri'] = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib/tree/main/instrumentation/system-metrics' + spec.metadata['bug_tracker_uri'] = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib/issues' + spec.metadata['documentation_uri'] = "https://rubydoc.info/gems/#{spec.name}/#{spec.version}" + end +end diff --git a/instrumentation/system-metrics/test/opentelemetry/instrumentation/system/instrumentation_test.rb b/instrumentation/system-metrics/test/opentelemetry/instrumentation/system/instrumentation_test.rb new file mode 100644 index 0000000000..6fc5c221f6 --- /dev/null +++ b/instrumentation/system-metrics/test/opentelemetry/instrumentation/system/instrumentation_test.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'test_helper' +require_relative '../../../../lib/opentelemetry/instrumentation/system' + +describe OpenTelemetry::Instrumentation::System::Instrumentation do + let(:instrumentation) { OpenTelemetry::Instrumentation::System::Instrumentation.instance } + let(:config) { {} } + + it 'has #name' do + _(instrumentation.name).must_equal 'OpenTelemetry::Instrumentation::System' + end + + before do + with_metrics_sdk do + reset_meter_provider + METRICS_EXPORTER.reset + end + + instrumentation.install(config) + end + + after do + instrumentation.instance_variable_set(:@installed, false) + end + + without_metrics_api do + describe 'without metrics api' do + it 'works' do + _(true).must_equal true + end + end + end + + without_metrics_sdk do + describe 'without metrics sdk' do + it 'works' do + _(true).must_equal true + end + end + end + + with_metrics_sdk do + describe 'with metrics sdk' do + it 'works' do + GC.start + + METRICS_EXPORTER.pull + snapshots = METRICS_EXPORTER.metric_snapshots.group_by(&:name) + + gc_count = snapshots.delete('process.runtime.gc_count') + _(gc_count.length).must_equal(1) + _(gc_count[0].data_points.length).must_equal(1) + _(gc_count[0].data_points[0].value).must_be :>, 0 + + thread_count = snapshots.delete('process.thread.count') + _(thread_count.length).must_equal(1) + _(thread_count[0].data_points.length).must_equal(1) + _(thread_count[0].data_points[0].value).must_be :>, 0 + + fd_count = snapshots.delete('process.open_file_descriptor.count') + _(fd_count.length).must_equal(1) + _(fd_count[0].data_points.length).must_equal(1) + _(fd_count[0].data_points[0].value).must_be :>, 0 + + cpu_time = snapshots.delete('process.cpu.time') + _(cpu_time.length).must_equal(1) + _(cpu_time[0].data_points.length).must_equal(2) + _(cpu_time[0].data_points[0].value).must_be :>=, 0 + _(cpu_time[0].data_points[1].value).must_be :>=, 0 + _(cpu_time[0].data_points.map { |d| d.attributes['cpu.mode'] }.sort).must_equal(%w[system user]) + + memory_usage = snapshots.delete('process.memory.usage') + _(memory_usage.length).must_equal(1) + _(memory_usage[0].data_points.length).must_equal(1) + _(memory_usage[0].data_points[0].value).must_be :>, 0 + + memory_virtual = snapshots.delete('process.memory.virtual') + _(memory_virtual.length).must_equal(1) + _(memory_virtual[0].data_points.length).must_equal(1) + _(memory_virtual[0].data_points[0].value).must_be :>, 0 + + context_switches = snapshots.delete('process.context_switches') + _(context_switches.length).must_equal(1) + + if linux? + _(context_switches[0].data_points.length).must_equal(2) + # _(context_switches[0].data_points[0].value).must_be :>, 0 + end + + paging_faults = snapshots.delete('process.paging.faults') + _(paging_faults.length).must_equal(1) + + if linux? + _(paging_faults[0].data_points.length).must_equal(2) + # _(paging_faults[0].data_points[0].value).must_be :>, 0 + end + + process_uptime = snapshots.delete('process.uptime') + _(process_uptime.length).must_equal(1) + _(process_uptime[0].data_points.length).must_equal(1) + _(process_uptime[0].data_points[0].value).must_be :>, 0 + + _(snapshots.keys).must_be_empty # exhaustive + end + end + end +end diff --git a/instrumentation/system-metrics/test/opentelemetry/instrumentation/system/platform_test.rb b/instrumentation/system-metrics/test/opentelemetry/instrumentation/system/platform_test.rb new file mode 100644 index 0000000000..d90ebad101 --- /dev/null +++ b/instrumentation/system-metrics/test/opentelemetry/instrumentation/system/platform_test.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'test_helper' +require_relative '../../../../lib/opentelemetry/instrumentation/system/platform' + +describe OpenTelemetry::Instrumentation::System::Platform do + let(:proc_status_raw) do + %( + Name:\tirb\nUmask:\t0022\nState:\tR (running)\nTgid:\t14\nNgid:\t0\nPid:\t14\nPPid:\t1\nTracerPid:\t0\nUid:\t0\t0\t0\t0\nGid:\t0\t0\t0\t0\nFDSize:\t256\nGroups:\t0 \nNStgid:\t14\nNSpid:\t14\nNSpgid:\t14\nNSsid:\t1\nKthread:\t0\nVmPeak:\t 480820 kB\nVmSize:\t 480756 kB\nVmLck:\t 0 kB\nVmPin:\t 0 kB\nVmHWM:\t 21020 kB\nVmRSS:\t 21020 kB\nRssAnon:\t 14208 kB\nRssFile:\t 6812 kB\nRssShmem:\t 0 kB\nVmData:\t 460748 kB\nVmStk:\t 8188 kB\nVmExe:\t 4 kB\nVmLib:\t 9356 kB\nVmPTE:\t 156 kB\nVmSwap:\t 0 kB\nHugetlbPages:\t 0 kB\nCoreDumping:\t0\nTHP_enabled:\t0\nuntag_mask:\t0xffffffffffffff\nThreads:\t2\nSigQ:\t0/31324\nSigPnd:\t0000000000000000\nShdPnd:\t0000000000000000\nSigBlk:\t0000000000000000\nSigIgn:\t0000000000000000\nSigCgt:\t0000000142017e4f\nCapInh:\t0000000000000000\nCapPrm:\t00000000a80425fb\nCapEff:\t00000000a80425fb\nCapBnd:\t00000000a80425fb\nCapAmb:\t0000000000000000\nNoNewPrivs:\t0\nSeccomp:\t0\nSeccomp_filters:\t0\nSpeculation_Store_Bypass:\tthread vulnerable\nSpeculationIndirectBranch:\tunknown\nCpus_allowed:\t3ff\nCpus_allowed_list:\t0-9\nMems_allowed:\t1\nMems_allowed_list:\t0\nvoluntary_ctxt_switches:\t119345\nnonvoluntary_ctxt_switches:\t7\n + ) + end + + let(:proc_stat_raw) do + "14 (irb) R 1 14 1 34816 14 4194560 3832 0 0 0 2657 1499 0 0 20 0 2 0 22878 492294144 5255 18446744073709551615 187650009333760 187650009337256 281474020148384 0 0 0 0 0 1107394127 0 0 0 17 8 0 0 0 0 0 187650009464144 187650009464936 187650643861504 281474020150775 281474020150814 281474020150814 281474020151269 0\n" + end + describe OpenTelemetry::Instrumentation::System::Platform::Linux::ProcStatusData do + let(:parsed) do + OpenTelemetry::Instrumentation::System::Platform::Linux::ProcStatusData.parse(proc_status_raw) + end + + it 'works' do + parsed + end + end + + describe OpenTelemetry::Instrumentation::System::Platform::Linux::ProcStatData do + let(:raw) do + end + + let(:parsed) do + OpenTelemetry::Instrumentation::System::Platform::Linux::ProcStatData.parse(proc_stat_raw) + end + + it 'works' do + parsed + end + end + + describe OpenTelemetry::Instrumentation::System::Platform::Linux::ProcCompoundData do + let(:parsed) { OpenTelemetry::Instrumentation::System::Platform::Linux::ProcCompoundData.parse(stat: proc_stat_raw, status: proc_status_raw) } + + it 'works' do + parsed + _(parsed.cpu_time_user).must_equal(26) + _(parsed.cpu_time_system).must_equal(14) + _(parsed.process_memory_usage).must_equal(5255) + _(parsed.process_memory_virtual).must_equal(492_294_144) + _(parsed.voluntary_context_switches).must_equal(119_345) + _(parsed.involuntary_context_switches).must_equal(7) + _(parsed.page_faults_major).must_equal(0) + _(parsed.page_faults_minor).must_equal(3832) + + # _(parsed.process_uptime).must_equal(0) if defined?(Process::CLOCK_BOOTTIME) + end + end +end diff --git a/instrumentation/system-metrics/test/test_helper.rb b/instrumentation/system-metrics/test/test_helper.rb new file mode 100644 index 0000000000..0b77b3e309 --- /dev/null +++ b/instrumentation/system-metrics/test/test_helper.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'bundler/setup' +Bundler.require(:default, :development, :test) + +require 'minitest/autorun' +require 'minitest/reporters' +require 'rspec/mocks/minitest_integration' + +Minitest::Reporters.use! Minitest::Reporters::SpecReporter.new + +EXPORTER = OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.new +span_processor = OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(EXPORTER) + +OpenTelemetry::SDK.configure do |c| + c.error_handler = ->(exception:, message:) { raise(exception || message) } + c.add_span_processor span_processor +end + +module LoadedMetricsFeatures + OTEL_METRICS_API_LOADED = !Gem.loaded_specs['opentelemetry-metrics-api'].nil? + OTEL_METRICS_SDK_LOADED = !Gem.loaded_specs['opentelemetry-metrics-sdk'].nil? + + extend self + + def api_loaded? + OTEL_METRICS_API_LOADED + end + + def sdk_loaded? + OTEL_METRICS_SDK_LOADED + end +end + +# NOTE: this isn't currently used, but it may be useful to fully reset state between tests +def reset_meter_provider + return unless LoadedMetricsFeatures.sdk_loaded? + + reset_metrics_exporter + resource = OpenTelemetry.meter_provider.resource + OpenTelemetry.meter_provider = OpenTelemetry::SDK::Metrics::MeterProvider.new(resource: resource) + OpenTelemetry.meter_provider.add_metric_reader(METRICS_EXPORTER) +end + +def reset_metrics_exporter + return unless LoadedMetricsFeatures.sdk_loaded? + + METRICS_EXPORTER.instance_exec do + @metric_snapshots.clear + @metric_store.instance_exec { @metric_streams.clear } + end +end + +if LoadedMetricsFeatures.sdk_loaded? + METRICS_EXPORTER = OpenTelemetry::SDK::Metrics::Export::InMemoryMetricPullExporter.new + OpenTelemetry.meter_provider.add_metric_reader(METRICS_EXPORTER) +end + +module ConditionalEvaluation + def self.included(base) + base.extend(self) + end + + def self.prepended(base) + base.extend(self) + end + + def with_metrics_sdk + yield if LoadedMetricsFeatures.sdk_loaded? + end + + def without_metrics_sdk + yield unless LoadedMetricsFeatures.sdk_loaded? + end + + def without_metrics_api + yield unless LoadedMetricsFeatures.api_loaded? + end + + def it(desc = 'anonymous', with_metrics_sdk: false, without_metrics_sdk: false, &block) + return super(desc, &block) unless with_metrics_sdk || without_metrics_sdk + + raise ArgumentError, 'without_metrics_sdk and with_metrics_sdk must be mutually exclusive' if without_metrics_sdk && with_metrics_sdk + + return if with_metrics_sdk && !LoadedMetricsFeatures.sdk_loaded? + return if without_metrics_sdk && LoadedMetricsFeatures.sdk_loaded? + + super(desc, &block) + end +end + +def linux? + RbConfig::CONFIG['host_os'].include?('linux') +end + +Minitest::Spec.prepend(ConditionalEvaluation)