diff options
Diffstat (limited to 'src/github.rs')
-rw-r--r-- | src/github.rs | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/src/github.rs b/src/github.rs new file mode 100644 index 0000000..6e499be --- /dev/null +++ b/src/github.rs @@ -0,0 +1,243 @@ +use std::fmt::Debug; + +use anyhow::{bail, Result}; +use chrono::Duration; +use graphql_client::{reqwest::post_graphql_blocking as post_graphql, GraphQLQuery}; + +use crate::types::{DateTime, Issue, PullRequest, HTML, URI}; + +const API_URL: &str = "https://api.github.com/graphql"; + +type Cursor = String; + +pub struct Github { + client: reqwest::blocking::Client, + owner: String, + repo: String, + label: String, +} + +trait ChunkedQuery: GraphQLQuery { + type Item; + + fn change_after(&self, v: Self::Variables, after: Option<String>) -> Self::Variables; + fn set_batch(&self, batch: i64, v: Self::Variables) -> Self::Variables; + + fn process(&self, d: Self::ResponseData) -> Result<(Vec<Self::Item>, Option<Cursor>)>; +} + +#[derive(Debug, GraphQLQuery)] +#[graphql( + schema_path = "vendor/github.com/schema.docs.graphql", + query_path = "src/issues.graphql", + response_derives = "Debug", + variables_derives = "Clone,Debug" +)] +pub struct IssuesQuery; + +impl ChunkedQuery for IssuesQuery { + type Item = Issue; + + fn change_after(&self, v: Self::Variables, after: Option<String>) -> Self::Variables { + Self::Variables { after, ..v } + } + fn set_batch(&self, batch: i64, v: Self::Variables) -> Self::Variables { + Self::Variables { batch, ..v } + } + + fn process(&self, d: Self::ResponseData) -> Result<(Vec<Self::Item>, Option<Cursor>)> { + debug!("rate limits: {:?}", d.rate_limit); + let issues = match d.repository { + Some(r) => r.issues, + None => bail!("query returned no repo"), + }; + // deliberately ignore all nulls. no idea why the schema doesn't make + // all of these links mandatory, having them nullable makes no sense. + let infos = issues + .edges + .unwrap_or_default() + .into_iter() + .filter_map(|e| e?.node) + .map(|n| Issue { + id: n.id, + title: n.title, + is_open: !n.closed, + body: n.body_html, + last_update: n.updated_at, + url: n.url, + }) + .collect(); + let cursor = if issues.page_info.has_next_page { + issues.page_info.end_cursor + } else { + None + }; + Ok((infos, cursor)) + } +} + +#[derive(Debug, GraphQLQuery)] +#[graphql( + schema_path = "vendor/github.com/schema.docs.graphql", + query_path = "src/pulls.graphql", + response_derives = "Debug", + variables_derives = "Clone,Debug" +)] +pub struct PullsQuery { + since: Option<DateTime>, +} + +impl ChunkedQuery for PullsQuery { + type Item = PullRequest; + + fn change_after(&self, v: Self::Variables, after: Option<String>) -> Self::Variables { + Self::Variables { after, ..v } + } + fn set_batch(&self, batch: i64, v: Self::Variables) -> Self::Variables { + Self::Variables { batch, ..v } + } + + fn process(&self, d: Self::ResponseData) -> Result<(Vec<Self::Item>, Option<Cursor>)> { + debug!("rate limits: {:?}", d.rate_limit); + let prs = match d.repository { + Some(r) => r.pull_requests, + None => bail!("query returned no repo"), + }; + // deliberately ignore all nulls. no idea why the schema doesn't make + // all of these links mandatory, having them nullable makes no sense. + let infos: Vec<PullRequest> = prs + .edges + .unwrap_or_default() + .into_iter() + .filter_map(|e| e?.node) + .map(|n| PullRequest { + id: n.id, + title: n.title, + is_open: !n.closed, + is_merged: n.merged, + body: n.body_html, + last_update: n.updated_at, + url: n.url, + base_ref: n.base_ref_name, + }) + .collect(); + let cursor = match (self.since, infos.last()) { + (Some(since), Some(last)) if last.last_update < since => None, + _ => { + if prs.page_info.has_next_page { + prs.page_info.end_cursor + } else { + None + } + } + }; + Ok((infos, cursor)) + } +} + +impl Github { + pub fn new(api_token: &str, owner: &str, repo: &str, label: &str) -> Result<Self> { + use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION}; + + let headers = match HeaderValue::from_str(&format!("Bearer {}", api_token)) { + Ok(h) => [(AUTHORIZATION, h)].into_iter().collect::<HeaderMap>(), + Err(e) => bail!("invalid API token: {}", e), + }; + let client = reqwest::blocking::Client::builder() + .user_agent(format!( + "{}/{}", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION") + )) + .default_headers(headers) + .build()?; + Ok(Github { + client, + owner: owner.to_string(), + repo: repo.to_string(), + label: label.to_string(), + }) + } + + fn query_raw<Q>(&self, q: Q, mut vars: <Q as GraphQLQuery>::Variables) -> Result<Vec<Q::Item>> + where + Q: ChunkedQuery + Debug, + Q::Variables: Clone + Debug, + { + let mut result = vec![]; + let max_batch = 100; + let mut batch = max_batch; + + loop { + vars = q.set_batch(batch, vars); + + debug!("running query {:?} with {:?}", q, vars); + let started = chrono::Local::now(); + let resp = post_graphql::<Q, _>(&self.client, API_URL, vars.clone())?; + let ended = chrono::Local::now(); + + // queries may time out. if that happens throttle the query once and try + // again, if that fails too we fail for good. + let resp = match resp.errors { + None => { + // time limit is 10 seconds. if we're well under that, increase + // the batch size again. + if batch != max_batch && ended - started < Duration::seconds(8) { + batch = (batch + batch / 10 + 1).min(max_batch); + info!("increasing batch size to {}", batch); + } + resp + } + Some(e) if batch > 1 && e.iter().all(|e| e.message.contains("timeout")) => { + warn!("throttling query due to timeout error: {:?}", e); + // anything larger than 1 seems to be unreliable here + batch = 1; + info!("new batch size: {}", batch); + continue; + } + Some(e) => bail!("query failed: {:?}", e), + }; + + match resp.data { + Some(d) => { + let (mut items, cursor) = q.process(d)?; + result.append(&mut items); + match cursor { + None => break, + cursor => vars = q.change_after(vars, cursor), + } + } + None => bail!("query returned no data"), + } + } + + Ok(result) + } + + pub fn query_issues(&self, since: Option<DateTime>) -> Result<Vec<Issue>> { + self.query_raw( + IssuesQuery, + issues_query::Variables { + owner: self.owner.clone(), + name: self.repo.clone(), + label: self.label.clone(), + after: None, + since, + batch: 100, + }, + ) + } + + pub fn query_pulls(&self, since: Option<DateTime>) -> Result<Vec<PullRequest>> { + self.query_raw( + PullsQuery { since }, + pulls_query::Variables { + owner: self.owner.clone(), + name: self.repo.clone(), + label: self.label.clone(), + after: None, + batch: 100, + }, + ) + } +} |