Source: lib/category-loader.js

import urlAssembler from 'url-assembler'
import got from 'got'
import CategoryItem from './category-item.js'
import buildSourceUrl from './build-source-url.js'

/**
 * Loads category members recursively from the given MediaWiki-Source
 *  Category items cannot always be retrieved all at once (500 at most)
 *  so we have to make multiple requests to the server
 * @private
 * @param {string} source
 * @param {string} categoryTitle
 * @param {CategoryItem[]} previousItems
 * @param {string} continueTerm
 * @return {Promise<CategoryItem[]>}
 */
async function getCategoryItems(source, categoryTitle, previousItems, continueTerm) {
  // # parameters which will be present in the url (query)
  const urlParams = {
    action: 'query',
    format: 'json',
    list: 'categorymembers',
    cmtitle: categoryTitle,
    cmlimit: 500 // maximum as of 2022
  }
  // ## continue term is being used by mediawiki to identify
  //     the current page of the category we want to retrieve
  if (continueTerm && continueTerm !== '') {
    urlParams.cmcontinue = continueTerm
  }

  const url = urlAssembler(source)
    .param(urlParams)
    .toString()

  const response = await got(url, {
    retry: 1
  })

  // # traverse wikitionary data down to the content
  // the following might throw a syntax error if response.body
  // is no valid json
  let body
  try {
    body = JSON.parse(response.body)
  } catch (e) {
    return Promise.reject(new Error('response-body-invalid'))
  }

  // check if json format is ok
  if (body.query === undefined || body.query.categorymembers === undefined) {
    throw new Error('response-body-invalid')
  }

  // map to CategoryItem
  const categories = body.query.categorymembers.map(
    (item) => new CategoryItem(item.pageid, item.title)
  )

  // Do we have reached the last category page?
  //  if so then return all items
  //  else fetch more (recursive call)
  if (body.continue === undefined) {
    return Promise.resolve(previousItems.concat(categories))
  } else {
    return getCategoryItems(
      source,
      categoryTitle,
      previousItems.concat(categories),
      body.continue.cmcontinue
    )
  }
}

/**
 * Encapsulates utility methods for a given mediawiki-compatible source
 */
export default class CategoryLoader {
  /**
   * Initalizes CategoryLoader class
   * @deprecated [please use static create methods to create an object]
   * @param {string} sourceUrl
   */
  constructor(sourceUrl) {
    this.sourceUrl = sourceUrl
  }

  /**
   * Creates a CategoryLoader object from an complete url to the api
   * @param {string} sourceUrl
   * @example
   * CategoryLoader.createFromUrl('https://en.wikipedia.org/w/api.php')
   * @return {CategoryLoader}
   */
  static createFromUrl(sourceUrl) {
    return new CategoryLoader(sourceUrl)
  }

  /**
   * Creates a CategoryLoader object from an language-independent
   * url-template (mustache style).
   * Common templates are available in exported member MwSources
   * @param {string} urlTemplate mustache syntax
   * @param {string} languageCode mediawiki compatible language code
   * @example
   * CategoryLoader.createFromTemplate(
   *   'https://{{language}}.wikipedia.org/w/api.php',
   *   'en'
   * )
   * @return {CategoryLoader}
   */
  static createFromTemplate(urlTemplate, languageCode) {
    return CategoryLoader.createFromUrl(
      buildSourceUrl(urlTemplate, languageCode)
    )
  }

  /**
   * Retrieves all category members (but not from their subcategories)
   * @param {string} categoryTitle
   * @return {Promise} Array of type CategoryItem
   */
  loadMembers(categoryTitle) {
    return getCategoryItems(this.sourceUrl, categoryTitle, [], '')
  }
}