mirror of
https://github.com/nushell/nushell.git
synced 2025-06-30 06:30:08 +02:00
feat: add query webpage-info
to plugin_nu_query
(#13252)
# Description This PR adds a new subcommand `query webpage-info` to `plugin_nu_query`. The subcommand is a basic wrapper for the [`webpage`](https://crates.io/crates/webpage) crate. Usage: ``` http get https://phoronix.com | query webpage-info ``` and it returns a `Record` version of [`webpage::HTML`](https://docs.rs/webpage/latest/webpage/struct.HTML.html). The PR also takes a shot at bringing @lily-mara 's [nu-serde::to_value](https://github.com/nushell/nushell/pull/3878/files) back to life, updating it for the latest version of nushell. That's not the main focus of the PR though - I just didn't want to have to implement a custom converter for `webpage::HTML` 😅. If it looks reasonable we could move it to `nu_protocol`(?) either in this PR or a future one (along with adding tests for it). # User-Facing Changes no breaking changes
This commit is contained in:
@ -22,4 +22,7 @@ nu-protocol = { path = "../nu-protocol", version = "0.95.1" }
|
||||
gjson = "0.8"
|
||||
scraper = { default-features = false, version = "0.19" }
|
||||
sxd-document = "0.3"
|
||||
sxd-xpath = "0.4"
|
||||
sxd-xpath = "0.4"
|
||||
webpage = { version = "2.0.1", features = ["serde"] }
|
||||
serde_json.workspace = true
|
||||
serde.workspace = true
|
||||
|
@ -1,6 +1,7 @@
|
||||
mod query;
|
||||
mod query_json;
|
||||
mod query_web;
|
||||
mod query_webpage_info;
|
||||
mod query_xml;
|
||||
mod web_tables;
|
||||
|
||||
|
@ -1,4 +1,7 @@
|
||||
use crate::{query_json::QueryJson, query_web::QueryWeb, query_xml::QueryXml};
|
||||
use crate::{
|
||||
query_json::QueryJson, query_web::QueryWeb, query_webpage_info::QueryWebpageInfo,
|
||||
query_xml::QueryXml,
|
||||
};
|
||||
use nu_plugin::{EvaluatedCall, Plugin, PluginCommand, SimplePluginCommand};
|
||||
use nu_protocol::{Category, LabeledError, Signature, Value};
|
||||
|
||||
@ -26,6 +29,7 @@ impl Plugin for Query {
|
||||
Box::new(QueryJson),
|
||||
Box::new(QueryXml),
|
||||
Box::new(QueryWeb),
|
||||
Box::new(QueryWebpageInfo),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
478
crates/nu_plugin_query/src/query_webpage_info.rs
Normal file
478
crates/nu_plugin_query/src/query_webpage_info.rs
Normal file
@ -0,0 +1,478 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, SimplePluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, Record, Signature, Span, Type, Value};
|
||||
|
||||
use crate::Query;
|
||||
|
||||
pub struct QueryWebpageInfo;
|
||||
|
||||
impl SimplePluginCommand for QueryWebpageInfo {
|
||||
type Plugin = Query;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"query webpage-info"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"uses the webpage crate to extract info from html: title, description, language, links, RSS feeds, Opengraph, Schema.org, and more"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(Type::String, Type::record())
|
||||
.category(Category::Network)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
web_examples()
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_plugin: &Query,
|
||||
_engine: &EngineInterface,
|
||||
_call: &EvaluatedCall,
|
||||
input: &Value,
|
||||
) -> Result<Value, LabeledError> {
|
||||
let span = input.span();
|
||||
match input {
|
||||
Value::String { val, .. } => execute_webpage(val, span),
|
||||
_ => Err(LabeledError::new("Requires text input")
|
||||
.with_label("expected text from pipeline", span)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn web_examples() -> Vec<Example<'static>> {
|
||||
vec![Example {
|
||||
example: "http get https://phoronix.com | query webpage-info",
|
||||
description: "extract detailed info from phoronix.com website",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn execute_webpage(html: &str, span: Span) -> Result<Value, LabeledError> {
|
||||
let info = webpage::HTML::from_string(html.to_string(), None)
|
||||
.map_err(|e| LabeledError::new(e.to_string()).with_label("error parsing html", span))?;
|
||||
|
||||
let value = to_value(info, span).map_err(|e| {
|
||||
LabeledError::new(e.to_string()).with_label("error convert Value::Record", span)
|
||||
})?;
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const HTML: &str = r#"
|
||||
<html><head><meta><title>My Title</title></head></html>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn test_basics() {
|
||||
let info = execute_webpage(HTML, Span::test_data()).unwrap();
|
||||
let record = info.as_record().unwrap();
|
||||
assert_eq!(record.get("title").unwrap().as_str().unwrap(), "My Title");
|
||||
}
|
||||
}
|
||||
|
||||
// revive nu-serde sketch
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
/// Convert any serde:Serialize into a `nu_protocol::Value`
|
||||
pub fn to_value<T>(value: T, span: Span) -> Result<Value, Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
value.serialize(&ValueSerializer { span })
|
||||
}
|
||||
|
||||
struct ValueSerializer {
|
||||
span: Span,
|
||||
}
|
||||
|
||||
struct MapSerializer<'a> {
|
||||
record: Record,
|
||||
serializer: &'a ValueSerializer,
|
||||
current_key: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a> serde::Serializer for &'a ValueSerializer {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
type SerializeSeq = SeqSerializer<'a>;
|
||||
type SerializeTuple = SeqSerializer<'a>;
|
||||
type SerializeTupleStruct = SeqSerializer<'a>;
|
||||
type SerializeTupleVariant = SeqSerializer<'a>;
|
||||
|
||||
type SerializeMap = MapSerializer<'a>;
|
||||
type SerializeStruct = MapSerializer<'a>;
|
||||
type SerializeStructVariant = MapSerializer<'a>;
|
||||
|
||||
fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::bool(v, self.span))
|
||||
}
|
||||
|
||||
fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::int(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::int(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::int(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::int(v, self.span))
|
||||
}
|
||||
|
||||
fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::int(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::int(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::int(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_u64(self, _v: u64) -> Result<Self::Ok, Self::Error> {
|
||||
// TODO: how to represent a u64 value a Value<i64>?
|
||||
Err(Error::new("the numbers are too big"))
|
||||
// Ok(Value::int(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::float(v.into(), self.span))
|
||||
}
|
||||
|
||||
fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::float(v, self.span))
|
||||
}
|
||||
|
||||
fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::string(v, self.span))
|
||||
}
|
||||
|
||||
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::string(v, self.span))
|
||||
}
|
||||
|
||||
fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::binary(v, self.span))
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::nothing(self.span))
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
// TODO: is this OK?
|
||||
Ok(Value::nothing(self.span))
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
// TODO: is this OK?
|
||||
Ok(Value::nothing(self.span))
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
) -> Result<Self::Ok, Self::Error> {
|
||||
// TODO: is this OK?
|
||||
Ok(Value::nothing(self.span))
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
|
||||
fn serialize_newtype_variant<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
Ok(SeqSerializer::new(self))
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
Ok(SeqSerializer::new(self))
|
||||
}
|
||||
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error> {
|
||||
Ok(SeqSerializer::new(self))
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error> {
|
||||
Ok(SeqSerializer::new(self))
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
Ok(MapSerializer::new(self))
|
||||
}
|
||||
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||
Ok(MapSerializer::new(self))
|
||||
}
|
||||
|
||||
fn serialize_struct_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error> {
|
||||
Ok(MapSerializer::new(self))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Error {
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn new<T: std::fmt::Display>(msg: T) -> Self {
|
||||
Error {
|
||||
message: msg.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl serde::ser::Error for Error {
|
||||
fn custom<T: std::fmt::Display>(msg: T) -> Self {
|
||||
Error::new(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
//
|
||||
// maps
|
||||
impl<'a> MapSerializer<'a> {
|
||||
fn new(serializer: &'a ValueSerializer) -> Self {
|
||||
Self {
|
||||
record: Record::new(),
|
||||
current_key: None,
|
||||
serializer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> serde::ser::SerializeStruct for MapSerializer<'a> {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
fn serialize_field<T: ?Sized>(
|
||||
&mut self,
|
||||
key: &'static str,
|
||||
value: &T,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
self.record
|
||||
.insert(key.to_owned(), value.serialize(self.serializer)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::record(self.record, self.serializer.span))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> serde::ser::SerializeMap for MapSerializer<'a> {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let value = serde_json::to_value(key).map_err(Error::new)?;
|
||||
let key = value
|
||||
.as_str()
|
||||
.ok_or(Error::new("key must be a string"))?
|
||||
.to_string();
|
||||
self.current_key = Some(key);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let key = self.current_key.take().ok_or(Error::new("key expected"))?;
|
||||
self.record.insert(key, value.serialize(self.serializer)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::record(self.record, self.serializer.span))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> serde::ser::SerializeStructVariant for MapSerializer<'a> {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
fn serialize_field<T: ?Sized>(
|
||||
&mut self,
|
||||
key: &'static str,
|
||||
value: &T,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
self.record
|
||||
.insert(key.to_owned(), value.serialize(self.serializer)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::record(self.record, self.serializer.span))
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// sequences
|
||||
struct SeqSerializer<'a> {
|
||||
seq: Vec<Value>,
|
||||
serializer: &'a ValueSerializer,
|
||||
}
|
||||
|
||||
impl<'a> SeqSerializer<'a> {
|
||||
fn new(serializer: &'a ValueSerializer) -> Self {
|
||||
Self {
|
||||
seq: Vec::new(),
|
||||
serializer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> serde::ser::SerializeSeq for SeqSerializer<'a> {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
self.seq.push(value.serialize(self.serializer)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::list(self.seq, self.serializer.span))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> serde::ser::SerializeTuple for SeqSerializer<'a> {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
self.seq.push(value.serialize(self.serializer)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::list(self.seq, self.serializer.span))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> serde::ser::SerializeTupleStruct for SeqSerializer<'a> {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
self.seq.push(value.serialize(self.serializer)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::list(self.seq, self.serializer.span))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> serde::ser::SerializeTupleVariant for SeqSerializer<'a> {
|
||||
type Ok = Value;
|
||||
type Error = Error;
|
||||
|
||||
fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
self.seq.push(value.serialize(self.serializer)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(Value::list(self.seq, self.serializer.span))
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user