How to Scrape a Steemit Post with Javascript (Node.js)

This simple module will parse steemd for some essential data. Data includes images, body, author, url to original content, and more. Its very easy to add to as well. It uses cheerio a simple jQuery like way to load html text into a manipulatable object. It also uses a package for getting data-uris if the imageUris option is passed. It was helpful to use chrome's inspector tool. If you open that up and right click an element you can actually copy the css selector and use it for cheerio. You'll notice I do some processing on the post body, this is because markdown is returned from my posts and I just want the relevant text. Here's the script, let me know if you have any questions

const util = require('util');
const path = require('path');
const request = require('request-promise');
const cheerio = require('cheerio');
const removeMd = require('remove-markdown');
const _ = require('lodash');

const reImgHtml = /(https?:\/\/.*\.(?:png|jpg|svg|gif|bmp))/i;
const reHtml = /(&nbsp;|<([^>]+)>)/ig;
const reUrl = /(?:https?|ftp):\/\/[^\s<>]+/g;
const realnames = {
  donmesswithbeer: 'My Secret Real Name',

function removeUrls(str) {
  return str.replace(reUrl, '');

function removeHtml(str) {
  return str.replace(reHtml, '');

function parseText(body) {
  return removeUrls(removeMd(removeHtml(body)));

function getSentences(body) {
  return parseText(body).split('. ');

async function getPost(postUrl) {
  const html = await request(postUrl);
  const $ = cheerio.load(html);
  const metatext = $('table > tbody > tr:nth-child(1) > td')[0].children[0].data;
  const meta = JSON.parse(metatext);
  const body = $(' > div > pre').text();
  const sentences = getSentences(body);
  const author = $(' > div > a').text();
  const url = $(' > a').attr('href');
  const title = $('div h3:first-of-type').text();
  const subtitle = sentences[0];
  const summary = `${sentences.slice(0, 2).join('. ')}.`;
  const parsedBody = parseText(body);
  return { meta, body, author, url, title, subtitle, summary, parsedBody };

module.exports = {

// example
// you MUST use steemd, not steemit
getPost('').catch(console.error).then( // outputs steemit blog data

