Files
picopackage/lib/picopackage/provider.rb
2025-01-27 16:13:56 +11:00

129 lines
3.7 KiB
Ruby

require "time"
module Picopackage
class Provider
def self.for(url)
PROVIDERS.each do |provider|
case provider.handles_url?(url)
when false
next
when true
return provider.new(url)
when :maybe
instance = provider.new(url)
return instance if instance.handles_body?
end
end
nil # Return nil if no provider found
end
end
# Base class for fetching content from a URL
# The variable `body` will contain the package_data retrieved from the URL
# The variable `package_data` will contain both and payload + metadata - this would be writen to a file.
# The variable `payload` will contain the payload extracted from `package_data`
# The variable `metadata` will contain the metadata extracted from `package_data`
# Job of the Provider class is to fetch the body from the URL, and then extract the package_data
# and the filename from the body. The SourceFile class will then take the body and split it into payload and metadata
class DefaultProvider
MAX_SIZE = 1024 * 1024
TIMEOUT = 10
attr_reader :url
def self.handles_url?(url) = :maybe
def initialize(url)
@url = transform_url(url)
@uri = URI(@url)
@body = nil
@content = nil
end
def body = @body ||= fetch
def json_body = @json_body ||= JSON.parse(body)
def transform_url(url) = url
def fetch
Net::HTTP.start(@uri.host, @uri.port, use_ssl: @uri.scheme == "https", read_timeout: TIMEOUT, open_timeout: TIMEOUT) do |http|
http.request_get(@uri.path) do |response|
raise "Unexpected response: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
@body = String.new(capacity: MAX_SIZE)
response.read_body do |chunk|
if @body.bytesize + chunk.bytesize > MAX_SIZE
raise FileTooLargeError, "Response would exceed #{MAX_SIZE} bytes"
end
@body << chunk
end
@body
end
end
@body
end
def handles_body?
true
rescue FileTooLargeError, Net::HTTPError, RuntimeError
false
end
# Implement in subclass - this come from the `body`.
# Spliting content into payload and metadata is the job of the SourceFile class
def content = body
# Implement in subclass - this should return the filename extracted from the body - if it exists, but not from the metadata
def filename = File.basename @url
def source_file
@source_file ||= SourceFile.from_content(content, metadata: {"filename" => filename, "url" => url, "packaged_at" => packaged_at}.compact)
end
end
class GithubGistProvider < DefaultProvider
def self.handles_url?(url) = url.match?(%r{gist\.github\.com})
def content = json_body["files"].values.first["content"]
def filename = json_body["files"].values.first["filename"]
def transform_url(url)
gist_id = url[/gist\.github\.com\/[^\/]+\/([a-f0-9]+)/, 1]
"https://api.github.com/gists/#{gist_id}"
end
def packaged_at
Time.parse(json_body["created_at"])
rescue ArgumentError
nil
end
end
class OpenGistProvider < DefaultProvider
def handles_url?(url) = :maybe
def transform_url(url) = "#{url}.json"
def content = json_body.dig("files", 0, "content")
def filename = json_body.dig("files", 0, "filename")
def handles_body?
content && filename
rescue FileTooLargeError, Net::HTTPError, RuntimeError
false
end
# If we successfully fetch the body, and the body contains content and a filename, then we can handle the body
end
PROVIDERS = [
GithubGistProvider,
OpenGistProvider,
DefaultProvider
].freeze
end