440 lines
17 KiB
JavaScript
440 lines
17 KiB
JavaScript
// Partial port of python's argparse module, version 3.9.0 (only wrap and fill functions):
|
|
// https://github.com/python/cpython/blob/v3.9.0b4/Lib/textwrap.py
|
|
|
|
'use strict'
|
|
|
|
/*
|
|
* Text wrapping and filling.
|
|
*/
|
|
|
|
// Copyright (C) 1999-2001 Gregory P. Ward.
|
|
// Copyright (C) 2002, 2003 Python Software Foundation.
|
|
// Copyright (C) 2020 argparse.js authors
|
|
// Originally written by Greg Ward <gward@python.net>
|
|
|
|
// Hardcode the recognized whitespace characters to the US-ASCII
|
|
// whitespace characters. The main reason for doing this is that
|
|
// some Unicode spaces (like \u00a0) are non-breaking whitespaces.
|
|
//
|
|
// This less funky little regex just split on recognized spaces. E.g.
|
|
// "Hello there -- you goof-ball, use the -b option!"
|
|
// splits into
|
|
// Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
|
|
const wordsep_simple_re = /([\t\n\x0b\x0c\r ]+)/
|
|
|
|
class TextWrapper {
|
|
/*
|
|
* Object for wrapping/filling text. The public interface consists of
|
|
* the wrap() and fill() methods; the other methods are just there for
|
|
* subclasses to override in order to tweak the default behaviour.
|
|
* If you want to completely replace the main wrapping algorithm,
|
|
* you'll probably have to override _wrap_chunks().
|
|
*
|
|
* Several instance attributes control various aspects of wrapping:
|
|
* width (default: 70)
|
|
* the maximum width of wrapped lines (unless break_long_words
|
|
* is false)
|
|
* initial_indent (default: "")
|
|
* string that will be prepended to the first line of wrapped
|
|
* output. Counts towards the line's width.
|
|
* subsequent_indent (default: "")
|
|
* string that will be prepended to all lines save the first
|
|
* of wrapped output; also counts towards each line's width.
|
|
* expand_tabs (default: true)
|
|
* Expand tabs in input text to spaces before further processing.
|
|
* Each tab will become 0 .. 'tabsize' spaces, depending on its position
|
|
* in its line. If false, each tab is treated as a single character.
|
|
* tabsize (default: 8)
|
|
* Expand tabs in input text to 0 .. 'tabsize' spaces, unless
|
|
* 'expand_tabs' is false.
|
|
* replace_whitespace (default: true)
|
|
* Replace all whitespace characters in the input text by spaces
|
|
* after tab expansion. Note that if expand_tabs is false and
|
|
* replace_whitespace is true, every tab will be converted to a
|
|
* single space!
|
|
* fix_sentence_endings (default: false)
|
|
* Ensure that sentence-ending punctuation is always followed
|
|
* by two spaces. Off by default because the algorithm is
|
|
* (unavoidably) imperfect.
|
|
* break_long_words (default: true)
|
|
* Break words longer than 'width'. If false, those words will not
|
|
* be broken, and some lines might be longer than 'width'.
|
|
* break_on_hyphens (default: true)
|
|
* Allow breaking hyphenated words. If true, wrapping will occur
|
|
* preferably on whitespaces and right after hyphens part of
|
|
* compound words.
|
|
* drop_whitespace (default: true)
|
|
* Drop leading and trailing whitespace from lines.
|
|
* max_lines (default: None)
|
|
* Truncate wrapped lines.
|
|
* placeholder (default: ' [...]')
|
|
* Append to the last line of truncated text.
|
|
*/
|
|
|
|
constructor(options = {}) {
|
|
let {
|
|
width = 70,
|
|
initial_indent = '',
|
|
subsequent_indent = '',
|
|
expand_tabs = true,
|
|
replace_whitespace = true,
|
|
fix_sentence_endings = false,
|
|
break_long_words = true,
|
|
drop_whitespace = true,
|
|
break_on_hyphens = true,
|
|
tabsize = 8,
|
|
max_lines = undefined,
|
|
placeholder=' [...]'
|
|
} = options
|
|
|
|
this.width = width
|
|
this.initial_indent = initial_indent
|
|
this.subsequent_indent = subsequent_indent
|
|
this.expand_tabs = expand_tabs
|
|
this.replace_whitespace = replace_whitespace
|
|
this.fix_sentence_endings = fix_sentence_endings
|
|
this.break_long_words = break_long_words
|
|
this.drop_whitespace = drop_whitespace
|
|
this.break_on_hyphens = break_on_hyphens
|
|
this.tabsize = tabsize
|
|
this.max_lines = max_lines
|
|
this.placeholder = placeholder
|
|
}
|
|
|
|
|
|
// -- Private methods -----------------------------------------------
|
|
// (possibly useful for subclasses to override)
|
|
|
|
_munge_whitespace(text) {
|
|
/*
|
|
* _munge_whitespace(text : string) -> string
|
|
*
|
|
* Munge whitespace in text: expand tabs and convert all other
|
|
* whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
|
|
* becomes " foo bar baz".
|
|
*/
|
|
if (this.expand_tabs) {
|
|
text = text.replace(/\t/g, ' '.repeat(this.tabsize)) // not strictly correct in js
|
|
}
|
|
if (this.replace_whitespace) {
|
|
text = text.replace(/[\t\n\x0b\x0c\r]/g, ' ')
|
|
}
|
|
return text
|
|
}
|
|
|
|
_split(text) {
|
|
/*
|
|
* _split(text : string) -> [string]
|
|
*
|
|
* Split the text to wrap into indivisible chunks. Chunks are
|
|
* not quite the same as words; see _wrap_chunks() for full
|
|
* details. As an example, the text
|
|
* Look, goof-ball -- use the -b option!
|
|
* breaks into the following chunks:
|
|
* 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
|
|
* 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
|
|
* if break_on_hyphens is True, or in:
|
|
* 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
|
|
* 'use', ' ', 'the', ' ', '-b', ' ', option!'
|
|
* otherwise.
|
|
*/
|
|
let chunks = text.split(wordsep_simple_re)
|
|
chunks = chunks.filter(Boolean)
|
|
return chunks
|
|
}
|
|
|
|
_handle_long_word(reversed_chunks, cur_line, cur_len, width) {
|
|
/*
|
|
* _handle_long_word(chunks : [string],
|
|
* cur_line : [string],
|
|
* cur_len : int, width : int)
|
|
*
|
|
* Handle a chunk of text (most likely a word, not whitespace) that
|
|
* is too long to fit in any line.
|
|
*/
|
|
// Figure out when indent is larger than the specified width, and make
|
|
// sure at least one character is stripped off on every pass
|
|
let space_left
|
|
if (width < 1) {
|
|
space_left = 1
|
|
} else {
|
|
space_left = width - cur_len
|
|
}
|
|
|
|
// If we're allowed to break long words, then do so: put as much
|
|
// of the next chunk onto the current line as will fit.
|
|
if (this.break_long_words) {
|
|
cur_line.push(reversed_chunks[reversed_chunks.length - 1].slice(0, space_left))
|
|
reversed_chunks[reversed_chunks.length - 1] = reversed_chunks[reversed_chunks.length - 1].slice(space_left)
|
|
|
|
// Otherwise, we have to preserve the long word intact. Only add
|
|
// it to the current line if there's nothing already there --
|
|
// that minimizes how much we violate the width constraint.
|
|
} else if (!cur_line) {
|
|
cur_line.push(...reversed_chunks.pop())
|
|
}
|
|
|
|
// If we're not allowed to break long words, and there's already
|
|
// text on the current line, do nothing. Next time through the
|
|
// main loop of _wrap_chunks(), we'll wind up here again, but
|
|
// cur_len will be zero, so the next line will be entirely
|
|
// devoted to the long word that we can't handle right now.
|
|
}
|
|
|
|
_wrap_chunks(chunks) {
|
|
/*
|
|
* _wrap_chunks(chunks : [string]) -> [string]
|
|
*
|
|
* Wrap a sequence of text chunks and return a list of lines of
|
|
* length 'self.width' or less. (If 'break_long_words' is false,
|
|
* some lines may be longer than this.) Chunks correspond roughly
|
|
* to words and the whitespace between them: each chunk is
|
|
* indivisible (modulo 'break_long_words'), but a line break can
|
|
* come between any two chunks. Chunks should not have internal
|
|
* whitespace; ie. a chunk is either all whitespace or a "word".
|
|
* Whitespace chunks will be removed from the beginning and end of
|
|
* lines, but apart from that whitespace is preserved.
|
|
*/
|
|
let lines = []
|
|
let indent
|
|
if (this.width <= 0) {
|
|
throw Error(`invalid width ${this.width} (must be > 0)`)
|
|
}
|
|
if (this.max_lines !== undefined) {
|
|
if (this.max_lines > 1) {
|
|
indent = this.subsequent_indent
|
|
} else {
|
|
indent = this.initial_indent
|
|
}
|
|
if (indent.length + this.placeholder.trimStart().length > this.width) {
|
|
throw Error('placeholder too large for max width')
|
|
}
|
|
}
|
|
|
|
// Arrange in reverse order so items can be efficiently popped
|
|
// from a stack of chucks.
|
|
chunks = chunks.reverse()
|
|
|
|
while (chunks.length > 0) {
|
|
|
|
// Start the list of chunks that will make up the current line.
|
|
// cur_len is just the length of all the chunks in cur_line.
|
|
let cur_line = []
|
|
let cur_len = 0
|
|
|
|
// Figure out which static string will prefix this line.
|
|
let indent
|
|
if (lines) {
|
|
indent = this.subsequent_indent
|
|
} else {
|
|
indent = this.initial_indent
|
|
}
|
|
|
|
// Maximum width for this line.
|
|
let width = this.width - indent.length
|
|
|
|
// First chunk on line is whitespace -- drop it, unless this
|
|
// is the very beginning of the text (ie. no lines started yet).
|
|
if (this.drop_whitespace && chunks[chunks.length - 1].trim() === '' && lines.length > 0) {
|
|
chunks.pop()
|
|
}
|
|
|
|
while (chunks.length > 0) {
|
|
let l = chunks[chunks.length - 1].length
|
|
|
|
// Can at least squeeze this chunk onto the current line.
|
|
if (cur_len + l <= width) {
|
|
cur_line.push(chunks.pop())
|
|
cur_len += l
|
|
|
|
// Nope, this line is full.
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
// The current line is full, and the next chunk is too big to
|
|
// fit on *any* line (not just this one).
|
|
if (chunks.length && chunks[chunks.length - 1].length > width) {
|
|
this._handle_long_word(chunks, cur_line, cur_len, width)
|
|
cur_len = cur_line.map(l => l.length).reduce((a, b) => a + b, 0)
|
|
}
|
|
|
|
// If the last chunk on this line is all whitespace, drop it.
|
|
if (this.drop_whitespace && cur_line.length > 0 && cur_line[cur_line.length - 1].trim() === '') {
|
|
cur_len -= cur_line[cur_line.length - 1].length
|
|
cur_line.pop()
|
|
}
|
|
|
|
if (cur_line) {
|
|
if (this.max_lines === undefined ||
|
|
lines.length + 1 < this.max_lines ||
|
|
(chunks.length === 0 ||
|
|
this.drop_whitespace &&
|
|
chunks.length === 1 &&
|
|
!chunks[0].trim()) && cur_len <= width) {
|
|
// Convert current line back to a string and store it in
|
|
// list of all lines (return value).
|
|
lines.push(indent + cur_line.join(''))
|
|
} else {
|
|
let had_break = false
|
|
while (cur_line) {
|
|
if (cur_line[cur_line.length - 1].trim() &&
|
|
cur_len + this.placeholder.length <= width) {
|
|
cur_line.push(this.placeholder)
|
|
lines.push(indent + cur_line.join(''))
|
|
had_break = true
|
|
break
|
|
}
|
|
cur_len -= cur_line[-1].length
|
|
cur_line.pop()
|
|
}
|
|
if (!had_break) {
|
|
if (lines) {
|
|
let prev_line = lines[lines.length - 1].trimEnd()
|
|
if (prev_line.length + this.placeholder.length <=
|
|
this.width) {
|
|
lines[lines.length - 1] = prev_line + this.placeholder
|
|
break
|
|
}
|
|
}
|
|
lines.push(indent + this.placeholder.lstrip())
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return lines
|
|
}
|
|
|
|
_split_chunks(text) {
|
|
text = this._munge_whitespace(text)
|
|
return this._split(text)
|
|
}
|
|
|
|
// -- Public interface ----------------------------------------------
|
|
|
|
wrap(text) {
|
|
/*
|
|
* wrap(text : string) -> [string]
|
|
*
|
|
* Reformat the single paragraph in 'text' so it fits in lines of
|
|
* no more than 'self.width' columns, and return a list of wrapped
|
|
* lines. Tabs in 'text' are expanded with string.expandtabs(),
|
|
* and all other whitespace characters (including newline) are
|
|
* converted to space.
|
|
*/
|
|
let chunks = this._split_chunks(text)
|
|
// not implemented in js
|
|
//if (this.fix_sentence_endings) {
|
|
// this._fix_sentence_endings(chunks)
|
|
//}
|
|
return this._wrap_chunks(chunks)
|
|
}
|
|
|
|
fill(text) {
|
|
/*
|
|
* fill(text : string) -> string
|
|
*
|
|
* Reformat the single paragraph in 'text' to fit in lines of no
|
|
* more than 'self.width' columns, and return a new string
|
|
* containing the entire wrapped paragraph.
|
|
*/
|
|
return this.wrap(text).join('\n')
|
|
}
|
|
}
|
|
|
|
|
|
// -- Convenience interface ---------------------------------------------
|
|
|
|
function wrap(text, options = {}) {
|
|
/*
|
|
* Wrap a single paragraph of text, returning a list of wrapped lines.
|
|
*
|
|
* Reformat the single paragraph in 'text' so it fits in lines of no
|
|
* more than 'width' columns, and return a list of wrapped lines. By
|
|
* default, tabs in 'text' are expanded with string.expandtabs(), and
|
|
* all other whitespace characters (including newline) are converted to
|
|
* space. See TextWrapper class for available keyword args to customize
|
|
* wrapping behaviour.
|
|
*/
|
|
let { width = 70, ...kwargs } = options
|
|
let w = new TextWrapper(Object.assign({ width }, kwargs))
|
|
return w.wrap(text)
|
|
}
|
|
|
|
function fill(text, options = {}) {
|
|
/*
|
|
* Fill a single paragraph of text, returning a new string.
|
|
*
|
|
* Reformat the single paragraph in 'text' to fit in lines of no more
|
|
* than 'width' columns, and return a new string containing the entire
|
|
* wrapped paragraph. As with wrap(), tabs are expanded and other
|
|
* whitespace characters converted to space. See TextWrapper class for
|
|
* available keyword args to customize wrapping behaviour.
|
|
*/
|
|
let { width = 70, ...kwargs } = options
|
|
let w = new TextWrapper(Object.assign({ width }, kwargs))
|
|
return w.fill(text)
|
|
}
|
|
|
|
// -- Loosely related functionality -------------------------------------
|
|
|
|
let _whitespace_only_re = /^[ \t]+$/mg
|
|
let _leading_whitespace_re = /(^[ \t]*)(?:[^ \t\n])/mg
|
|
|
|
function dedent(text) {
|
|
/*
|
|
* Remove any common leading whitespace from every line in `text`.
|
|
*
|
|
* This can be used to make triple-quoted strings line up with the left
|
|
* edge of the display, while still presenting them in the source code
|
|
* in indented form.
|
|
*
|
|
* Note that tabs and spaces are both treated as whitespace, but they
|
|
* are not equal: the lines " hello" and "\\thello" are
|
|
* considered to have no common leading whitespace.
|
|
*
|
|
* Entirely blank lines are normalized to a newline character.
|
|
*/
|
|
// Look for the longest leading string of spaces and tabs common to
|
|
// all lines.
|
|
let margin = undefined
|
|
text = text.replace(_whitespace_only_re, '')
|
|
let indents = text.match(_leading_whitespace_re) || []
|
|
for (let indent of indents) {
|
|
indent = indent.slice(0, -1)
|
|
|
|
if (margin === undefined) {
|
|
margin = indent
|
|
|
|
// Current line more deeply indented than previous winner:
|
|
// no change (previous winner is still on top).
|
|
} else if (indent.startsWith(margin)) {
|
|
// pass
|
|
|
|
// Current line consistent with and no deeper than previous winner:
|
|
// it's the new winner.
|
|
} else if (margin.startsWith(indent)) {
|
|
margin = indent
|
|
|
|
// Find the largest common whitespace between current line and previous
|
|
// winner.
|
|
} else {
|
|
for (let i = 0; i < margin.length && i < indent.length; i++) {
|
|
if (margin[i] !== indent[i]) {
|
|
margin = margin.slice(0, i)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (margin) {
|
|
text = text.replace(new RegExp('^' + margin, 'mg'), '')
|
|
}
|
|
return text
|
|
}
|
|
|
|
module.exports = { wrap, fill, dedent }
|