diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 0000000..1c6afb9 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,26 @@ +{ + "parserOptions": { + "ecmaVersion": 6, + "sourceType": "module" + }, + "extends": "eslint:recommended", + "rules": { + "strict": [2, "global"], + "block-scoped-var": 2, + "consistent-return": 2, + "eqeqeq": [2, "smart"], + "guard-for-in": 2, + "no-caller": 2, + "no-extend-native": 2, + "no-loop-func": 2, + "no-new": 2, + "no-param-reassign": 2, + "no-return-assign": 2, + "no-unused-expressions": 2, + "no-use-before-define": 2, + "radix": [2, "always"], + "indent": [2, 2], + "quotes": [2, "double"], + "semi": [2, "always"] + } +} diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..4435abb --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,12 @@ +**Description of the change** + +Clearly and concisely describe the purpose of the pull request. If this PR relates to an existing issue or change proposal, please link to it. Include any other background context that would help reviewers understand the motivation for this PR. + +--- + +**Checklist:** + +- [ ] Added the change to the changelog's "Unreleased" section with a reference to this PR (e.g. "- Made a change (#0000)") +- [ ] Linked any existing issues or proposals that this pull request should close +- [ ] Updated or added relevant documentation +- [ ] Added a test for the contribution (if applicable) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c69237a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,35 @@ +name: CI + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: purescript-contrib/setup-purescript@main + with: + purescript: "unstable" + + - uses: actions/setup-node@v2 + with: + node-version: "14.x" + + - name: Install dependencies + run: | + npm install -g bower + npm install + bower install --production + + - name: Build source + run: npm run-script build + + - name: Run tests + run: | + bower install + npm run-script test --if-present diff --git a/.gitignore b/.gitignore index dc070b8..b846b63 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ /.* !/.gitignore +!/.eslintrc.json +!/.github/ /bower_components/ /node_modules/ /output/ -/tmp/ +package-lock.json diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f0f30c5 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,252 @@ +# Changelog + +Notable changes to this project are documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +Breaking changes: + +New features: +- Added `startsWith` and `endsWith` (#147) + +Bugfixes: + +Other improvements: +- Redefine `Data.String.NonEmpty.CodeUnits.fromFoldable1` in terms of `singleton` (#168 by @postsolar) + +## [v6.0.1](https://github.com/purescript/purescript-strings/releases/tag/v6.0.1) - 2022-08-16 + +Bugfixes: +- Fix `Char`'s `toEnum` implementation (#163 by @JordanMartinez) + +## [v6.0.0](https://github.com/purescript/purescript-strings/releases/tag/v6.0.0) - 2022-04-27 + +Breaking changes: +- Migrate FFI to ES modules (#158 by @kl0tl and @JordanMartinez) +- Replaced polymorphic proxies with monomorphic `Proxy` (#158 by @JordanMartinez) +- In `slice`, drop bounds checking and `Maybe` return type (#145 by Quelklef) + +New features: + +Bugfixes: + +Other improvements: +- Surround code with backticks in documentation (#148) +- Make `RegexFlags` a `newtype` and a `Newtype` instance for it(#159 by @mhmdanas) + +## [v5.0.0](https://github.com/purescript/purescript-strings/releases/tag/v5.0.0) - 2021-02-26 + +Breaking changes: +- Added support for PureScript 0.14 and dropped support for all previous versions (#129) +- Updated `replace'` to reflect the existence of optional capturing groups (#126) + +New features: +- Replaced `unsafeCoerce` with `coerce` where appropriate (#130) +- Replaced monomorphic proxies with `Type.Proxy.Proxy` and polymorphic variables (#134) +- Added a dotAll regexp flag (#133) + +Bugfixes: +- Removed the bounds check from the foreign implementation of `lastIndexOf'` (#137) + +Other improvements: +- Fix line endings to match overall project style (#132) +- Removed references to `codePointToInt`, which no longer exists (#135) +- Migrated CI to GitHub Actions and updated installation instructions to use Spago (#136) +- Added a changelog and pull request template (#140, #141) + +## [v4.0.2](https://github.com/purescript/purescript-strings/releases/tag/v4.0.2) - 2020-05-13 + +- Improved performance for `stripPrefix` / `stripSuffix` (#123, @michaelficarra) + +## [v4.0.1](https://github.com/purescript/purescript-strings/releases/tag/v4.0.1) - 2018-11-11 + +- Fixed out of bounds access in `unsafeCodePointAt0Fallback` (@zyla) +- Fixed `slice` when end index equals string length (@abaco) + +## [v4.0.0](https://github.com/purescript/purescript-strings/releases/tag/v4.0.0) - 2018-05-23 + +- Updated for PureScript 0.12 +- `splitAt` now always returns a value (#78, @MonoidMusician) +- Added `slice` (@themattchan) +- Added more `String` `Gen`s to correspond with `Char` `Gen`s (@matthewleon) +- `Regex` `match` now returns `NonEmptyArray` +- All string functions now operate on code points now rather than code units. The old functions are available via the `.CodeUnits` modules +- `fromCharCode` can return `Nothing` now if given a value out of range + +## [v3.5.0](https://github.com/purescript/purescript-strings/releases/tag/v3.5.0) - 2018-02-12 + +- Added `Data.String.NonEmpty` + +## [v3.4.0](https://github.com/purescript/purescript-strings/releases/tag/v3.4.0) - 2017-12-28 + +- Add `Show CodePoint` instance (@csicar) +- Add `codePointFromChar` (@csicar) +- Expanded docs for most functions in `Data.String` and `Data.String.CodePoints` (@csicar) + +## [v3.3.2](https://github.com/purescript/purescript-strings/releases/tag/v3.3.2) - 2017-11-19 + +- Performance improvement in `Data.String.Regex.match` (@fehrenbach) + +## [v3.3.1](https://github.com/purescript/purescript-strings/releases/tag/v3.3.1) - 2017-08-06 + +- Fix some `Show` instances (@Rufflewind) + +## [v3.3.0](https://github.com/purescript/purescript-strings/releases/tag/v3.3.0) - 2017-07-10 + +- Add a new module `Data.String.CodePoints`, which treats strings as sequences of Unicode code points rather than sequences of UTF-16 code units. In the future we may swap this module with `Data.String`. (@michaelficarra) +- Fix a typo in the documentation (@ijks) + +## [v3.2.1](https://github.com/purescript/purescript-strings/releases/tag/v3.2.1) - 2017-06-06 + +- Ensure `genString` behaves the same regardless of the `MonadGen` implementation of `chooseInt` when `max < min` + +## [v3.2.0](https://github.com/purescript/purescript-strings/releases/tag/v3.2.0) - 2017-06-05 + +- Generated strings from `genString` now vary in length +- Added additional `Char` generators + +## [v3.1.0](https://github.com/purescript/purescript-strings/releases/tag/v3.1.0) - 2017-04-28 + +- Added some generator functions - introduced `Data.String.Gen` and `Data.Char.Gen` + +## [v3.0.0](https://github.com/purescript/purescript-strings/releases/tag/v3.0.0) - 2017-03-26 + +- Updated for PureScript 0.11 + +## [v2.1.0](https://github.com/purescript/purescript-strings/releases/tag/v2.1.0) - 2016-12-25 + +- Added `unsafeRegex` (@rightfold) + +## [v2.0.2](https://github.com/purescript/purescript-strings/releases/tag/v2.0.2) - 2016-10-26 + +- Documentation fix for `split` #70 (@leighman) + +## [v2.0.1](https://github.com/purescript/purescript-strings/releases/tag/v2.0.1) - 2016-10-08 + +- Improved `null` check implementation (@Risto-Stevcev) + +## [v2.0.0](https://github.com/purescript/purescript-strings/releases/tag/v2.0.0) - 2016-10-08 + +- Updated dependencies +- `Pattern` and `Replacement` newtypes are now used to distinguish between arguments when a function accepts multiple strings +- `RegexFlags` have been reworked as a monoid (@Risto-Stevcev) + +## [v1.1.0](https://github.com/purescript/purescript-strings/releases/tag/v1.1.0) - 2016-07-20 + +- Restored export of the `count` function. + +## [v1.0.0](https://github.com/purescript/purescript-strings/releases/tag/v1.0.0) - 2016-06-01 + +This release is intended for the PureScript 0.9.1 compiler and newer. + +**Note**: The v1.0.0 tag is not meant to indicate the library is β€œfinished”, the core libraries are all being bumped to this for the 0.9 compiler release so as to use semver more correctly. + +## [v0.7.1](https://github.com/purescript/purescript-strings/releases/tag/v0.7.1) - 2015-11-20 + +- Removed unused imports (@tfausak) + +## [v0.7.0](https://github.com/purescript/purescript-strings/releases/tag/v0.7.0) - 2015-08-13 + +- Removed orphan (and incorrect) `Bounded Char` instance + +## [v0.6.0](https://github.com/purescript/purescript-strings/releases/tag/v0.6.0) - 2015-08-02 + +- Added `toLower` and `toUpper` to `Data.Char` +- `search` in `Data.String.Regex` now returns `Maybe` result rather than using -1 for failure +- Added test suite + +All updates by @LiamGoodacre + +## [v0.5.5](https://github.com/purescript/purescript-strings/releases/tag/v0.5.5) - 2015-07-28 + +Add `stripSuffix`. + +## [v0.5.4](https://github.com/purescript/purescript-strings/releases/tag/v0.5.4) - 2015-07-18 + +- Removed duplicate `Show` instance for `Char` (@anttih) + +## [v0.5.3](https://github.com/purescript/purescript-strings/releases/tag/v0.5.3) - 2015-07-10 + +Add `stripPrefix` (@hdgarrood) + +## [v0.5.2](https://github.com/purescript/purescript-strings/releases/tag/v0.5.2) - 2015-07-07 + +- Fixed `char` and `charCodeAt` in `Data.String.Unsafe` #36 (@stkb) + +## [v0.5.1](https://github.com/purescript/purescript-strings/releases/tag/v0.5.1) - 2015-07-06 + +- Fixed missing `count` implementation (@qxjit) + +## [v0.5.0](https://github.com/purescript/purescript-strings/releases/tag/v0.5.0) - 2015-06-30 + +This release works with versions 0.7.\* of the PureScript compiler. It will not work with older versions. If you are using an older version, you should require an older, compatible version of this library. + +- Fixed various FFI exports (@sharkdp) +- Fixed `localeCompare` + +## [v0.4.5](https://github.com/purescript/purescript-strings/releases/tag/v0.4.5) - 2015-03-23 + +- Added `char` to `Data.String.Unsafe` (@brainrape) +- Functions in `Data.String.Unsafe` now throw errors immediately when given unacceptable inputs (@brainrape) + +## [v0.4.4](https://github.com/purescript/purescript-strings/releases/tag/v0.4.4) - 2015-03-22 + +- Updated docs + +## [v0.4.3](https://github.com/purescript/purescript-strings/releases/tag/v0.4.3) - 2015-02-18 + +- Added `noFlags` record for default regex flags (@fresheyeball) + +## [v0.4.2](https://github.com/purescript/purescript-strings/releases/tag/v0.4.2) - 2014-11-28 + +- Added `null`, `singleton`, `uncons`, `takeWhile`, and `dropWhile` to `Data.String` (@NightRa) + +## [v0.4.1](https://github.com/purescript/purescript-strings/releases/tag/v0.4.1) - 2014-11-06 + +- Use ternary operator in JavaScript output (@davidchambers) + +## [v0.4.0](https://github.com/purescript/purescript-strings/releases/tag/v0.4.0) - 2014-10-27 + +- Made `charCodeAt` safe, added unsafe versions of `charAt`, `charCodeAt` (@garyb) + +## [v0.3.3](https://github.com/purescript/purescript-strings/releases/tag/v0.3.3) - 2014-10-24 + +- Added `split` to `Data.String.Regex` (@davidchambers) + +## [v0.3.2](https://github.com/purescript/purescript-strings/releases/tag/v0.3.2) - 2014-10-16 + +- Added essential instances for `Char` (@jdegoes) + +## [v0.3.1](https://github.com/purescript/purescript-strings/releases/tag/v0.3.1) - 2014-10-15 + +- Fixed typo in `fromCharArray` FFI implementation (@jdegoes) + +## [v0.3.0](https://github.com/purescript/purescript-strings/releases/tag/v0.3.0) - 2014-10-14 + +- Introduced `Char` newtype and corresponding functions (@jdegoes) +- Made `charAt` safe - breaking change (@jdegoes) + +## [v0.2.1](https://github.com/purescript/purescript-strings/releases/tag/v0.2.1) - 2014-07-21 + +- Fix typo in FFI definition for `flags` (@garyb) + +## [v0.2.0](https://github.com/purescript/purescript-strings/releases/tag/v0.2.0) - 2014-07-20 + +- `Show` instance for `Regex` (@michaelficarra) +- `Regex` now has `RegexFlags` rather than a string for options (@michaelficarra) + +## [v0.1.3](https://github.com/purescript/purescript-strings/releases/tag/v0.1.3) - 2014-05-04 + +- Renamed `Data.String.Regex.replaceR` to `replace`, added `replace'` which uses a function to construct replacements for matches. + +## [v0.1.2](https://github.com/purescript/purescript-strings/releases/tag/v0.1.2) - 2014-04-30 + +- Added `indexOf'` and `lastIndexOf'` (paf31) + +## [v0.1.1](https://github.com/purescript/purescript-strings/releases/tag/v0.1.1) - 2014-04-27 + +- Swapped `joinWith` arguments for better style + +## [v0.1.0](https://github.com/purescript/purescript-strings/releases/tag/v0.1.0) - 2014-04-25 + +- Initial release diff --git a/Gruntfile.js b/Gruntfile.js deleted file mode 100644 index 3c41546..0000000 --- a/Gruntfile.js +++ /dev/null @@ -1,42 +0,0 @@ -module.exports = function(grunt) { - - "use strict"; - - grunt.initConfig({ - - libFiles: [ - "src/**/*.purs", - "bower_components/purescript-*/src/**/*.purs", - ], - - clean: ["output"], - - pscMake: ["<%=libFiles%>"], - dotPsci: ["<%=libFiles%>"], - docgen: { - readme: { - src: "src/**/*.purs", - dest: "README.md" - } - }, - jsvalidate: { - options:{ - globals: {}, - esprimaOptions: {}, - verbose: false - }, - targetName:{ - files:{ - src: ['output/Data.String/*.js'] - } - } - } - }); - - grunt.loadNpmTasks('grunt-jsvalidate'); - grunt.loadNpmTasks("grunt-contrib-clean"); - grunt.loadNpmTasks("grunt-purescript"); - - grunt.registerTask("make", ["pscMake", "dotPsci", "docgen", "jsvalidate"]); - grunt.registerTask("default", ["make"]); -}; diff --git a/LICENSE b/LICENSE index 58b0299..311379c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,20 +1,26 @@ -The MIT License (MIT) +Copyright 2018 PureScript -Copyright (c) 2014 PureScript +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 770a294..73292c1 100644 --- a/README.md +++ b/README.md @@ -1,128 +1,17 @@ -# Module Documentation +# purescript-strings -## Module Data.Char +[![Latest release](http://img.shields.io/github/release/purescript/purescript-strings.svg)](https://github.com/purescript/purescript-strings/releases) +[![Build status](https://github.com/purescript/purescript-strings/workflows/CI/badge.svg?branch=master)](https://github.com/purescript/purescript-strings/actions?query=workflow%3ACI+branch%3Amaster) +[![Pursuit](https://pursuit.purescript.org/packages/purescript-strings/badge)](https://pursuit.purescript.org/packages/purescript-strings) -### Types +String and char utility functions, regular expressions. - newtype Char +## Installation +``` +spago install strings +``` -### Type Class Instances +## Documentation - instance eqChar :: Eq Char - - instance ordChar :: Ord Char - - instance showChar :: Show Char - - -### Values - - charString :: Char -> String - - fromCharCode :: Number -> Char - - toCharCode :: Char -> Number - - -## Module Data.String - -### Values - - charAt :: Number -> String -> Maybe Char - - charCodeAt :: Number -> String -> Maybe Number - - count :: (Char -> Boolean) -> String -> Number - - drop :: Number -> String -> String - - dropWhile :: (Char -> Boolean) -> String -> String - - fromChar :: Char -> String - - fromCharArray :: [Char] -> String - - indexOf :: String -> String -> Number - - indexOf' :: String -> Number -> String -> Number - - joinWith :: String -> [String] -> String - - lastIndexOf :: String -> String -> Number - - lastIndexOf' :: String -> Number -> String -> Number - - length :: String -> Number - - localeCompare :: String -> String -> Number - - null :: String -> Boolean - - replace :: String -> String -> String -> String - - singleton :: Char -> String - - split :: String -> String -> [String] - - take :: Number -> String -> String - - takeWhile :: (Char -> Boolean) -> String -> String - - toCharArray :: String -> [Char] - - toLower :: String -> String - - toUpper :: String -> String - - trim :: String -> String - - uncons :: String -> Maybe { tail :: String, head :: Char } - - -## Module Data.String.Regex - -### Types - - data Regex :: * - - type RegexFlags = { unicode :: Boolean, sticky :: Boolean, multiline :: Boolean, ignoreCase :: Boolean, global :: Boolean } - - -### Type Class Instances - - instance showRegex :: Show Regex - - -### Values - - flags :: Regex -> RegexFlags - - match :: Regex -> String -> Maybe [String] - - parseFlags :: String -> RegexFlags - - regex :: String -> RegexFlags -> Regex - - renderFlags :: RegexFlags -> String - - replace :: Regex -> String -> String -> String - - replace' :: Regex -> (String -> [String] -> String) -> String -> String - - search :: Regex -> String -> Number - - source :: Regex -> String - - split :: Regex -> String -> [String] - - test :: Regex -> String -> Boolean - - -## Module Data.String.Unsafe - -### Values - - charAt :: Number -> String -> Char - - charCodeAt :: Number -> String -> Number \ No newline at end of file +Module documentation is [published on Pursuit](http://pursuit.purescript.org/packages/purescript-strings). diff --git a/bench/Main.purs b/bench/Main.purs new file mode 100644 index 0000000..87c1ca0 --- /dev/null +++ b/bench/Main.purs @@ -0,0 +1,57 @@ +module Bench.Main where + +import Prelude + +import Control.Monad.Eff (Eff) +import Control.Monad.Eff.Console (CONSOLE, log) +import Data.Array.NonEmpty (fromArray) +import Data.Maybe (fromJust) +import Data.String (toCharArray) +import Data.String.NonEmpty (fromFoldable1, fromNonEmptyCharArray) +import Partial.Unsafe (unsafePartial) +import Performance.Minibench (benchWith) + +main :: Eff (console :: CONSOLE) Unit +main = do + log "NonEmpty conversions" + log "======" + log "" + benchNonEmptyConversions + +benchNonEmptyConversions :: Eff (console :: CONSOLE) Unit +benchNonEmptyConversions = do + log "fromNonEmptyCharArray: short" + log "---" + benchFromNonEmptyCharArray + log "" + + log "fromFoldable1" + log "---" + benchFromFoldable1 + log "" + + where + + benchFromNonEmptyCharArray = do + log "short string" + bench \_ -> fromNonEmptyCharArray shortStringArr + + log "long string" + bench \_ -> fromNonEmptyCharArray longStringArr + + benchFromFoldable1 = do + log "short string" + bench \_ -> fromFoldable1 shortStringArr + + log "long string" + bench \_ -> fromFoldable1 longStringArr + + shortStringArr = unsafePartial fromJust $ fromArray + $ toCharArray "supercalifragilisticexpialidocious" + longStringArr = unsafePartial fromJust $ fromArray + $ toCharArray loremIpsum + + bench = benchWith 100000 + +loremIpsum :: String +loremIpsum = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut aliquet euismod ligula, vitae lacinia lorem imperdiet nec. Nulla volutpat ullamcorper mollis. Proin interdum quam a sem auctor, id tempus nisl pretium. Suspendisse potenti. Quisque ut libero consequat, suscipit sem a, malesuada nisi. Aliquam dictum odio mi, eu laoreet felis scelerisque non. Ut in odio vehicula, cursus augue sed, tincidunt lorem. Vestibulum consequat lectus eu commodo vulputate. Nam vitae faucibus ipsum. Curabitur sit amet neque sed est sagittis vehicula nec nec risus. Phasellus consectetur cursus malesuada. Vestibulum commodo lorem ut mauris mollis faucibus. Integer ut massa auctor, scelerisque nisi nec, rutrum nisl. Integer vel ex sem. Sed purus felis, molestie eget cursus vel, maximus ut augue. Curabitur nunc ligula, lobortis vitae vehicula a, volutpat nec sem. Phasellus non sapien ipsum. Mauris dolor justo, mollis at elit a, sollicitudin commodo quam. Curabitur posuere felis at nunc pharetra, eu convallis lectus dapibus. Aliquam ullamcorper porta fermentum. Donec at tellus metus. Donec pharetra tempor odio sit amet viverra. Nam vel metus libero. Vivamus maximus quis lacus id pharetra. Duis sed diam molestie, sodales leo id, pulvinar justo. In non augue tempor risus consectetur hendrerit. In libero nulla, elementum non ultrices eu, vehicula non ipsum. Maecenas in hendrerit tellus, sodales dignissim turpis. Ut odio diam, convallis in elit non, consequat gravida nisi. Cras egestas metus eleifend sapien efficitur, vel vulputate est porta. Aliquam posuere, magna nec bibendum luctus, quam risus efficitur sapien, id volutpat metus ex non lorem. Praesent velit eros, efficitur sed tortor quis, lobortis eleifend ligula. Sed tellus quam, aliquet vitae sagittis a, egestas eget massa. Etiam odio elit, hendrerit vel dui vel, fermentum pharetra neque. Curabitur quis mauris id lacus consectetur rhoncus non nec mauris. Mauris blandit tempor pretium. Donec non nisi finibus, lobortis dolor vitae, euismod arcu. Nullam scelerisque lacus in dolor volutpat mollis. Nunc vitae consectetur ligula, quis laoreet quam.Proin sit amet nisi eu orci hendrerit imperdiet vitae sit amet leo. Donec sodales id ante eget viverra. Nullam vitae elit in mauris accumsan feugiat id a velit. Nulla facilisi. Cras in turpis efficitur, consectetur justo quis, suscipit tortor. Sed tincidunt pellentesque sapien, in ultricies eros rhoncus sit amet. Integer blandit ornare lobortis. Duis dictum sit amet mauris sit amet cursus. Nullam nec nisl mauris. Praesent cursus imperdiet mi mattis luctus. Donec in tortor fermentum, efficitur turpis vel, facilisis augue. Integer egestas nisl et magna volutpat ornare. Donec pulvinar risus elit, eget viverra est feugiat in.Ut nec ante vestibulum neque pulvinar pretium sit amet eu nisi. Aliquam erat volutpat. Maecenas egestas nisi et mi congue, sed ultricies nibh posuere. Suspendisse potenti. Donec a nulla et velit elementum pretium. Pellentesque gravida imperdiet sem et varius. Praesent ac diam diam. Donec iaculis risus ex, ac eleifend sapien luctus ut. Fusce aliquet, lacus tincidunt porta malesuada, massa augue commodo nulla, ac malesuada tortor est sed eros. Praesent mattis, nisi eget ullamcorper vestibulum, lacus ante placerat metus, ac ullamcorper ante tellus vel nulla. Praesent vehicula in est sit amet varius. Sed facilisis felis sed sem porttitor rutrum. Etiam sollicitudin erat neque, id gravida metus scelerisque quis. Proin venenatis pharetra lectus ac auctor." diff --git a/bower.json b/bower.json index de4b307..85a17c5 100644 --- a/bower.json +++ b/bower.json @@ -1,23 +1,41 @@ { "name": "purescript-strings", "homepage": "https://github.com/purescript/purescript-strings", - "description": "String utility functions and regular expressions", - "keywords": [ - "purescript" - ], - "license": "MIT", + "license": "BSD-3-Clause", + "repository": { + "type": "git", + "url": "https://github.com/purescript/purescript-strings.git" + }, "ignore": [ "**/.*", "bower_components", "node_modules", "output", - "tests", - "tmp", + "test", "bower.json", - "Gruntfile.js", "package.json" ], "dependencies": { - "purescript-maybe": "~0.2.1" + "purescript-arrays": "^7.0.0", + "purescript-control": "^6.0.0", + "purescript-either": "^6.0.0", + "purescript-enums": "^6.0.1", + "purescript-foldable-traversable": "^6.0.0", + "purescript-gen": "^4.0.0", + "purescript-integers": "^6.0.0", + "purescript-maybe": "^6.0.0", + "purescript-newtype": "^5.0.0", + "purescript-nonempty": "^7.0.0", + "purescript-partial": "^4.0.0", + "purescript-prelude": "^6.0.0", + "purescript-tailrec": "^6.0.0", + "purescript-tuples": "^7.0.0", + "purescript-unfoldable": "^6.0.0", + "purescript-unsafe-coerce": "^6.0.0" + }, + "devDependencies": { + "purescript-assert": "^6.0.0", + "purescript-console": "^6.0.0", + "purescript-minibench": "^4.0.0" } } diff --git a/package.json b/package.json index c2867b1..cffd45e 100644 --- a/package.json +++ b/package.json @@ -1,9 +1,18 @@ { "private": true, - "dependencies": { - "grunt": "~0.4.4", - "grunt-purescript": "~0.5.1", - "grunt-contrib-clean": "~0.5.0", - "grunt-jsvalidate": "~0.2.2" + "scripts": { + "clean": "rimraf output && rimraf .pulp-cache", + "build": "eslint src && pulp build -- --censor-lib --strict", + "test": "pulp test && npm run test:run:without_codePointAt", + "test:run:without_codePointAt": "node -e \"delete String.prototype.codePointAt; import('./output/Test.Main/index.js').then(m => m.main());\"", + "bench:build": "purs compile 'bench/**/*.purs' 'src/**/*.purs' 'bower_components/*/src/**/*.purs'", + "bench:run": "node --expose-gc -e 'require(\"./output/Bench.Main/index.js\").main()'", + "bench": "npm run bench:build && npm run bench:run" + }, + "devDependencies": { + "eslint": "^7.15.0", + "pulp": "16.0.0-0", + "purescript-psa": "^0.8.2", + "rimraf": "^3.0.2" } } diff --git a/src/Data/Char.purs b/src/Data/Char.purs new file mode 100644 index 0000000..bb413b7 --- /dev/null +++ b/src/Data/Char.purs @@ -0,0 +1,16 @@ +-- | A type and functions for single characters. +module Data.Char + ( toCharCode + , fromCharCode + ) where + +import Data.Enum (fromEnum, toEnum) +import Data.Maybe (Maybe) + +-- | Returns the numeric Unicode value of the character. +toCharCode :: Char -> Int +toCharCode = fromEnum + +-- | Constructs a character from the given Unicode numeric value. +fromCharCode :: Int -> Maybe Char +fromCharCode = toEnum diff --git a/src/Data/Char/Char.purs b/src/Data/Char/Char.purs deleted file mode 100644 index 2fa34e3..0000000 --- a/src/Data/Char/Char.purs +++ /dev/null @@ -1,36 +0,0 @@ -module Data.Char - ( Char(), - charString, - fromCharCode, - toCharCode - ) where - - newtype Char = Char String - - charString :: Char -> String - charString (Char s) = s - - foreign import toCharCode - """ - function toCharCode(c) { - return c.charCodeAt(0); - } - """ :: Char -> Number - - foreign import fromCharCode - """ - function fromCharCode(c) { - return String.fromCharCode(c); - } - """ :: Number -> Char - - instance eqChar :: Eq Char where - (==) (Char a) (Char b) = a == b - - (/=) a b = not (a == b) - - instance ordChar :: Ord Char where - compare (Char a) (Char b) = a `compare` b - - instance showChar :: Show Char where - show (Char s) = "Char " ++ show s diff --git a/src/Data/Char/Gen.purs b/src/Data/Char/Gen.purs new file mode 100644 index 0000000..838ff29 --- /dev/null +++ b/src/Data/Char/Gen.purs @@ -0,0 +1,35 @@ +module Data.Char.Gen where + +import Prelude + +import Control.Monad.Gen (class MonadGen, chooseInt, oneOf) +import Data.Enum (toEnumWithDefaults) +import Data.NonEmpty ((:|)) + +-- | Generates a character of the Unicode basic multilingual plane. +genUnicodeChar :: forall m. MonadGen m => m Char +genUnicodeChar = toEnumWithDefaults bottom top <$> chooseInt 0 65536 + +-- | Generates a character in the ASCII character set, excluding control codes. +genAsciiChar :: forall m. MonadGen m => m Char +genAsciiChar = toEnumWithDefaults bottom top <$> chooseInt 32 127 + +-- | Generates a character in the ASCII character set. +genAsciiChar' :: forall m. MonadGen m => m Char +genAsciiChar' = toEnumWithDefaults bottom top <$> chooseInt 0 127 + +-- | Generates a character that is a numeric digit. +genDigitChar :: forall m. MonadGen m => m Char +genDigitChar = toEnumWithDefaults bottom top <$> chooseInt 48 57 + +-- | Generates a character from the basic latin alphabet. +genAlpha :: forall m. MonadGen m => m Char +genAlpha = oneOf (genAlphaLowercase :| [genAlphaUppercase]) + +-- | Generates a lowercase character from the basic latin alphabet. +genAlphaLowercase :: forall m. MonadGen m => m Char +genAlphaLowercase = toEnumWithDefaults bottom top <$> chooseInt 97 122 + +-- | Generates an uppercase character from the basic latin alphabet. +genAlphaUppercase :: forall m. MonadGen m => m Char +genAlphaUppercase = toEnumWithDefaults bottom top <$> chooseInt 65 90 diff --git a/src/Data/String.purs b/src/Data/String.purs index 09e2714..742f265 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -1,221 +1,10 @@ module Data.String - ( - charAt, - charCodeAt, - fromCharArray, - fromChar, - indexOf, - indexOf', - lastIndexOf, - lastIndexOf', - null, - uncons, - length, - singleton, - localeCompare, - replace, - count, - take, - takeWhile, - drop, - dropWhile, - split, - toCharArray, - toLower, - toUpper, - trim, - joinWith + ( module Data.String.Common + , module Data.String.CodePoints + , module Data.String.Pattern ) where - import Data.Maybe - import Data.Char - import Data.Function - import qualified Data.String.Unsafe as U +import Data.String.CodePoints - foreign import _charAt - """ - function _charAt(i, s, Just, Nothing) { - return i >= 0 && i < s.length ? Just(s.charAt(i)) : Nothing; - } - """ :: forall a. Fn4 Number String (a -> Maybe a) (Maybe a) (Maybe Char) - - charAt :: Number -> String -> Maybe Char - charAt n s = runFn4 _charAt n s Just Nothing - - fromChar :: Char -> String - fromChar = charString - - singleton :: Char -> String - singleton = fromChar - - foreign import _charCodeAt - """ - function _charCodeAt(i, s, Just, Nothing) { - return i >= 0 && i < s.length ? Just(s.charCodeAt(i)) : Nothing; - } - """ :: forall a. Fn4 Number String (a -> Maybe a) (Maybe a) (Maybe Number) - - charCodeAt :: Number -> String -> Maybe Number - charCodeAt n s = runFn4 _charCodeAt n s Just Nothing - - null :: String -> Boolean - null s = length s == 0 - - uncons :: String -> Maybe {head :: Char, tail :: String} - uncons s | null s = Nothing - uncons s = Just {head : U.charAt 0 s, tail : drop 1 s} - - takeWhile :: (Char -> Boolean) -> String -> String - takeWhile p s = take (count p s) s - - dropWhile :: (Char -> Boolean) -> String -> String - dropWhile p s = drop (count p s) s - - foreign import fromCharArray - """ - function fromCharArray(a) { - return a.join(''); - } - """ :: [Char] -> String - - foreign import indexOf - """ - function indexOf(x) { - return function(s) { - return s.indexOf(x); - }; - } - """ :: String -> String -> Number - - foreign import indexOf' - """ - function indexOf$prime(x) { - return function(startAt) { - return function(s) { - return s.indexOf(x, startAt); - }; - }; - } - """ :: String -> Number -> String -> Number - - foreign import lastIndexOf - """ - function lastIndexOf(x) { - return function(s) { - return s.lastIndexOf(x); - }; - } - """ :: String -> String -> Number - - foreign import lastIndexOf' - """ - function lastIndexOf$prime(x) { - return function(startAt) { - return function(s) { - return s.lastIndexOf(x, startAt); - }; - }; - } - """ :: String -> Number -> String -> Number - - foreign import length - """ - function length(s) { - return s.length; - } - """ :: String -> Number - - foreign import localeCompare - """ - function localeCompare(s1) { - return function(s2) { - return s1.localeCompare(s2); - }; - } - """ :: String -> String -> Number - - foreign import replace - """ - function replace(s1) { - return function(s2) { - return function(s3) { - return s3.replace(s1, s2); - }; - }; - } - """ :: String -> String -> String -> String - - foreign import take - """ - function take(n) { - return function(s) { - return s.substr(0, n); - }; - } - """ :: Number -> String -> String - - foreign import drop - """ - function drop(n) { - return function(s) { - return s.substr(n); - }; - } - """ :: Number -> String -> String - - foreign import count - """ - function count(p){ - return function(s){ - var i; - for(i = 0; i < s.length && p(s.charAt(i)); i++){}; - return i; - }; - } - """ :: (Char -> Boolean) -> String -> Number - - foreign import split - """ - function split(sep) { - return function(s) { - return s.split(sep); - }; - } - """ :: String -> String -> [String] - - foreign import toCharArray - """ - function toCharArray(s) { - return s.split(''); - } - """ :: String -> [Char] - - foreign import toLower - """ - function toLower(s) { - return s.toLowerCase(); - } - """ :: String -> String - - foreign import toUpper - """ - function toUpper(s) { - return s.toUpperCase(); - } - """ :: String -> String - - foreign import trim - """ - function trim(s) { - return s.trim(); - } - """ :: String -> String - - foreign import joinWith - """ - function joinWith(s) { - return function(xs) { - return xs.join(s); - }; - } - """ :: String -> [String] -> String +import Data.String.Common (joinWith, localeCompare, null, replace, replaceAll, split, toLower, toUpper, trim) +import Data.String.Pattern (Pattern(..), Replacement(..)) diff --git a/src/Data/String/CaseInsensitive.purs b/src/Data/String/CaseInsensitive.purs new file mode 100644 index 0000000..3783164 --- /dev/null +++ b/src/Data/String/CaseInsensitive.purs @@ -0,0 +1,22 @@ +module Data.String.CaseInsensitive where + +import Prelude + +import Data.Newtype (class Newtype) +import Data.String (toLower) + +-- | A newtype for case insensitive string comparisons and ordering. +newtype CaseInsensitiveString = CaseInsensitiveString String + +instance eqCaseInsensitiveString :: Eq CaseInsensitiveString where + eq (CaseInsensitiveString s1) (CaseInsensitiveString s2) = + toLower s1 == toLower s2 + +instance ordCaseInsensitiveString :: Ord CaseInsensitiveString where + compare (CaseInsensitiveString s1) (CaseInsensitiveString s2) = + compare (toLower s1) (toLower s2) + +instance showCaseInsensitiveString :: Show CaseInsensitiveString where + show (CaseInsensitiveString s) = "(CaseInsensitiveString " <> show s <> ")" + +derive instance newtypeCaseInsensitiveString :: Newtype CaseInsensitiveString _ diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js new file mode 100644 index 0000000..ebd9e39 --- /dev/null +++ b/src/Data/String/CodePoints.js @@ -0,0 +1,107 @@ +/* global Symbol */ + +var hasArrayFrom = typeof Array.from === "function"; +var hasStringIterator = + typeof Symbol !== "undefined" && + Symbol != null && + typeof Symbol.iterator !== "undefined" && + typeof String.prototype[Symbol.iterator] === "function"; +var hasFromCodePoint = typeof String.prototype.fromCodePoint === "function"; +var hasCodePointAt = typeof String.prototype.codePointAt === "function"; + +export const _unsafeCodePointAt0 = function (fallback) { + return hasCodePointAt + ? function (str) { return str.codePointAt(0); } + : fallback; +}; + +export const _codePointAt = function (fallback) { + return function (Just) { + return function (Nothing) { + return function (unsafeCodePointAt0) { + return function (index) { + return function (str) { + var length = str.length; + if (index < 0 || index >= length) return Nothing; + if (hasStringIterator) { + var iter = str[Symbol.iterator](); + for (var i = index;; --i) { + var o = iter.next(); + if (o.done) return Nothing; + if (i === 0) return Just(unsafeCodePointAt0(o.value)); + } + } + return fallback(index)(str); + }; + }; + }; + }; + }; +}; + +export const _countPrefix = function (fallback) { + return function (unsafeCodePointAt0) { + if (hasStringIterator) { + return function (pred) { + return function (str) { + var iter = str[Symbol.iterator](); + for (var cpCount = 0; ; ++cpCount) { + var o = iter.next(); + if (o.done) return cpCount; + var cp = unsafeCodePointAt0(o.value); + if (!pred(cp)) return cpCount; + } + }; + }; + } + return fallback; + }; +}; + +export const _fromCodePointArray = function (singleton) { + return hasFromCodePoint + ? function (cps) { + // Function.prototype.apply will fail for very large second parameters, + // so we don't use it for arrays with 10,000 or more entries. + if (cps.length < 10e3) { + return String.fromCodePoint.apply(String, cps); + } + return cps.map(singleton).join(""); + } + : function (cps) { + return cps.map(singleton).join(""); + }; +}; + +export const _singleton = function (fallback) { + return hasFromCodePoint ? String.fromCodePoint : fallback; +}; + +export const _take = function (fallback) { + return function (n) { + if (hasStringIterator) { + return function (str) { + var accum = ""; + var iter = str[Symbol.iterator](); + for (var i = 0; i < n; ++i) { + var o = iter.next(); + if (o.done) return accum; + accum += o.value; + } + return accum; + }; + } + return fallback(n); + }; +}; + +export const _toCodePointArray = function (fallback) { + return function (unsafeCodePointAt0) { + if (hasArrayFrom) { + return function (str) { + return Array.from(str, unsafeCodePointAt0); + }; + } + return fallback; + }; +}; diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs new file mode 100644 index 0000000..f5b5fd2 --- /dev/null +++ b/src/Data/String/CodePoints.purs @@ -0,0 +1,436 @@ +-- | These functions allow PureScript strings to be treated as if they were +-- | sequences of Unicode code points instead of their true underlying +-- | implementation (sequences of UTF-16 code units). For nearly all uses of +-- | strings, these functions should be preferred over the ones in +-- | `Data.String.CodeUnits`. +module Data.String.CodePoints + ( module Exports + , CodePoint + , codePointFromChar + , singleton + , fromCodePointArray + , toCodePointArray + , codePointAt + , uncons + , length + , countPrefix + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , take + -- , takeRight + , takeWhile + , drop + -- , dropRight + , dropWhile + -- , slice + , splitAt + ) where + +import Prelude + +import Data.Array as Array +import Data.Enum (class BoundedEnum, class Enum, Cardinality(..), defaultPred, defaultSucc, fromEnum, toEnum, toEnumWithDefaults) +import Data.Int (hexadecimal, toStringAs) +import Data.Maybe (Maybe(..)) +import Data.String.CodeUnits (contains, stripPrefix, stripSuffix, startsWith, endsWith) as Exports +import Data.String.CodeUnits as CU +import Data.String.Common (toUpper) +import Data.String.Pattern (Pattern) +import Data.String.Unsafe as Unsafe +import Data.Tuple (Tuple(..)) +import Data.Unfoldable (unfoldr) + +-- | CodePoint is an `Int` bounded between `0` and `0x10FFFF`, corresponding to +-- | Unicode code points. +newtype CodePoint = CodePoint Int + +derive instance eqCodePoint :: Eq CodePoint +derive instance ordCodePoint :: Ord CodePoint + +instance showCodePoint :: Show CodePoint where + show (CodePoint i) = "(CodePoint 0x" <> toUpper (toStringAs hexadecimal i) <> ")" + +instance boundedCodePoint :: Bounded CodePoint where + bottom = CodePoint 0 + top = CodePoint 0x10FFFF + +instance enumCodePoint :: Enum CodePoint where + succ = defaultSucc toEnum fromEnum + pred = defaultPred toEnum fromEnum + +instance boundedEnumCodePoint :: BoundedEnum CodePoint where + cardinality = Cardinality (0x10FFFF + 1) + fromEnum (CodePoint n) = n + toEnum n + | n >= 0 && n <= 0x10FFFF = Just (CodePoint n) + | otherwise = Nothing + +-- | Creates a `CodePoint` from a given `Char`. +-- | +-- | ```purescript +-- | >>> codePointFromChar 'B' +-- | CodePoint 0x42 -- represents 'B' +-- | ``` +-- | +codePointFromChar :: Char -> CodePoint +codePointFromChar = fromEnum >>> CodePoint + +-- | Creates a string containing just the given code point. Operates in +-- | constant space and time. +-- | +-- | ```purescript +-- | >>> map singleton (toEnum 0x1D400) +-- | Just "𝐀" +-- | ``` +-- | +singleton :: CodePoint -> String +singleton = _singleton singletonFallback + +foreign import _singleton + :: (CodePoint -> String) + -> CodePoint + -> String + +singletonFallback :: CodePoint -> String +singletonFallback (CodePoint cp) | cp <= 0xFFFF = fromCharCode cp +singletonFallback (CodePoint cp) = + let lead = ((cp - 0x10000) / 0x400) + 0xD800 in + let trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 in + fromCharCode lead <> fromCharCode trail + +-- | Creates a string from an array of code points. Operates in space and time +-- | linear to the length of the array. +-- | +-- | ```purescript +-- | >>> codePointArray = toCodePointArray "c 𝐀" +-- | >>> codePointArray +-- | [CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400] +-- | >>> fromCodePointArray codePointArray +-- | "c 𝐀" +-- | ``` +-- | +fromCodePointArray :: Array CodePoint -> String +fromCodePointArray = _fromCodePointArray singletonFallback + +foreign import _fromCodePointArray + :: (CodePoint -> String) + -> Array CodePoint + -> String + +-- | Creates an array of code points from a string. Operates in space and time +-- | linear to the length of the string. +-- | +-- | ```purescript +-- | >>> codePointArray = toCodePointArray "b 𝐀𝐀" +-- | >>> codePointArray +-- | [CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400] +-- | >>> map singleton codePointArray +-- | ["b", " ", "𝐀", "𝐀"] +-- | ``` +-- | +toCodePointArray :: String -> Array CodePoint +toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 + +foreign import _toCodePointArray + :: (String -> Array CodePoint) + -> (String -> CodePoint) + -> String + -> Array CodePoint + +toCodePointArrayFallback :: String -> Array CodePoint +toCodePointArrayFallback s = unfoldr unconsButWithTuple s + +unconsButWithTuple :: String -> Maybe (Tuple CodePoint String) +unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s + +-- | Returns the first code point of the string after dropping the given number +-- | of code points from the beginning, if there is such a code point. Operates +-- | in constant space and in time linear to the given index. +-- | +-- | ```purescript +-- | >>> codePointAt 1 "𝐀𝐀𝐀𝐀" +-- | Just (CodePoint 0x1D400) -- represents "𝐀" +-- | -- compare to Data.String: +-- | >>> charAt 1 "𝐀𝐀𝐀𝐀" +-- | Just 'οΏ½' +-- | ``` +-- | +codePointAt :: Int -> String -> Maybe CodePoint +codePointAt n _ | n < 0 = Nothing +codePointAt 0 "" = Nothing +codePointAt 0 s = Just (unsafeCodePointAt0 s) +codePointAt n s = _codePointAt codePointAtFallback Just Nothing unsafeCodePointAt0 n s + +foreign import _codePointAt + :: (Int -> String -> Maybe CodePoint) + -> (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> (String -> CodePoint) + -> Int + -> String + -> Maybe CodePoint + +codePointAtFallback :: Int -> String -> Maybe CodePoint +codePointAtFallback n s = case uncons s of + Just { head, tail } -> if n == 0 then Just head else codePointAtFallback (n - 1) tail + _ -> Nothing + +-- | Returns a record with the first code point and the remaining code points +-- | of the string. Returns `Nothing` if the string is empty. Operates in +-- | constant space and time. +-- | +-- | ```purescript +-- | >>> uncons "𝐀𝐀 c 𝐀" +-- | Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" } +-- | >>> uncons "" +-- | Nothing +-- | ``` +-- | +uncons :: String -> Maybe { head :: CodePoint, tail :: String } +uncons s = case CU.length s of + 0 -> Nothing + 1 -> Just { head: CodePoint (fromEnum (Unsafe.charAt 0 s)), tail: "" } + _ -> + let + cu0 = fromEnum (Unsafe.charAt 0 s) + cu1 = fromEnum (Unsafe.charAt 1 s) + in + if isLead cu0 && isTrail cu1 + then Just { head: unsurrogate cu0 cu1, tail: CU.drop 2 s } + else Just { head: CodePoint cu0, tail: CU.drop 1 s } + +-- | Returns the number of code points in the string. Operates in constant +-- | space and in time linear to the length of the string. +-- | +-- | ```purescript +-- | >>> length "b 𝐀𝐀 c 𝐀" +-- | 8 +-- | -- compare to Data.String.CodeUnits: +-- | >>> length "b 𝐀𝐀 c 𝐀" +-- | 11 +-- | ``` +-- | +length :: String -> Int +length = Array.length <<< toCodePointArray + +-- | Returns the number of code points in the leading sequence of code points +-- | which all match the given predicate. Operates in constant space and in +-- | time linear to the length of the string. +-- | +-- | ```purescript +-- | >>> countPrefix (\c -> fromEnum c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | 2 +-- | ``` +-- | +countPrefix :: (CodePoint -> Boolean) -> String -> Int +countPrefix = _countPrefix countFallback unsafeCodePointAt0 + +foreign import _countPrefix + :: ((CodePoint -> Boolean) -> String -> Int) + -> (String -> CodePoint) + -> (CodePoint -> Boolean) + -> String + -> Int + +countFallback :: (CodePoint -> Boolean) -> String -> Int +countFallback p s = countTail p s 0 + +countTail :: (CodePoint -> Boolean) -> String -> Int -> Int +countTail p s accum = case uncons s of + Just { head, tail } -> if p head then countTail p tail (accum + 1) else accum + _ -> accum + +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the string. Returns `Nothing` when no matches are found. +-- | +-- | ```purescript +-- | >>> indexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀" +-- | Just 2 +-- | >>> indexOf (Pattern "o") "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +indexOf :: Pattern -> String -> Maybe Int +indexOf p s = (\i -> length (CU.take i s)) <$> CU.indexOf p s + +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the string. Pattern matches preceding the given index will be +-- | ignored. Returns `Nothing` when no matches are found. +-- | +-- | ```purescript +-- | >>> indexOf' (Pattern "𝐀") 4 "b 𝐀𝐀 c 𝐀" +-- | Just 7 +-- | >>> indexOf' (Pattern "o") 4 "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +indexOf' :: Pattern -> Int -> String -> Maybe Int +indexOf' p i s = + let s' = drop i s in + (\k -> i + length (CU.take k s')) <$> CU.indexOf p s' + +-- | Returns the number of code points preceding the last match of the given +-- | pattern in the string. Returns `Nothing` when no matches are found. +-- | +-- | ```purescript +-- | >>> lastIndexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀" +-- | Just 7 +-- | >>> lastIndexOf (Pattern "o") "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +lastIndexOf :: Pattern -> String -> Maybe Int +lastIndexOf p s = (\i -> length (CU.take i s)) <$> CU.lastIndexOf p s + +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the string. Pattern matches following the given index will be +-- | ignored. +-- | +-- | Giving a negative index is equivalent to giving 0 and giving an index +-- | greater than the number of code points in the string is equivalent to +-- | searching in the whole string. +-- | +-- | Returns `Nothing` when no matches are found. +-- | +-- | ```purescript +-- | >>> lastIndexOf' (Pattern "𝐀") (-1) "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | >>> lastIndexOf' (Pattern "𝐀") 0 "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | >>> lastIndexOf' (Pattern "𝐀") 5 "b 𝐀𝐀 c 𝐀" +-- | Just 3 +-- | >>> lastIndexOf' (Pattern "𝐀") 8 "b 𝐀𝐀 c 𝐀" +-- | Just 7 +-- | >>> lastIndexOf' (Pattern "o") 5 "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +lastIndexOf' :: Pattern -> Int -> String -> Maybe Int +lastIndexOf' p i s = + let i' = CU.length (take i s) in + (\k -> length (CU.take k s)) <$> CU.lastIndexOf' p i' s + +-- | Returns a string containing the given number of code points from the +-- | beginning of the given string. If the string does not have that many code +-- | points, returns the entire string. Operates in constant space and in time +-- | linear to the given number. +-- | +-- | ```purescript +-- | >>> take 3 "b 𝐀𝐀 c 𝐀" +-- | "b 𝐀" +-- | -- compare to Data.String: +-- | >>> take 3 "b 𝐀𝐀 c 𝐀" +-- | "b οΏ½" +-- | ``` +-- | +take :: Int -> String -> String +take = _take takeFallback + +foreign import _take :: (Int -> String -> String) -> Int -> String -> String + +takeFallback :: Int -> String -> String +takeFallback n _ | n < 1 = "" +takeFallback n s = case uncons s of + Just { head, tail } -> singleton head <> takeFallback (n - 1) tail + _ -> s + +-- | Returns a string containing the leading sequence of code points which all +-- | match the given predicate from the string. Operates in constant space and +-- | in time linear to the length of the string. +-- | +-- | ```purescript +-- | >>> takeWhile (\c -> fromEnum c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | "𝐀𝐀" +-- | ``` +-- | +takeWhile :: (CodePoint -> Boolean) -> String -> String +takeWhile p s = take (countPrefix p s) s + +-- | Drops the given number of code points from the beginning of the string. If +-- | the string does not have that many code points, returns the empty string. +-- | Operates in constant space and in time linear to the given number. +-- | +-- | ```purescript +-- | >>> drop 5 "𝐀𝐀 b c" +-- | "c" +-- | -- compared to Data.String: +-- | >>> drop 5 "𝐀𝐀 b c" +-- | "b c" -- because "𝐀" occupies 2 code units +-- | ``` +-- | +drop :: Int -> String -> String +drop n s = CU.drop (CU.length (take n s)) s + +-- | Drops the leading sequence of code points which all match the given +-- | predicate from the string. Operates in constant space and in time linear +-- | to the length of the string. +-- | +-- | ```purescript +-- | >>> dropWhile (\c -> fromEnum c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | " b c 𝐀" +-- | ``` +-- | +dropWhile :: (CodePoint -> Boolean) -> String -> String +dropWhile p s = drop (countPrefix p s) s + +-- | Splits a string into two substrings, where `before` contains the code +-- | points up to (but not including) the given index, and `after` contains the +-- | rest of the string, from that index on. +-- | +-- | ```purescript +-- | >>> splitAt 3 "b 𝐀𝐀 c 𝐀" +-- | { before: "b 𝐀", after: "𝐀 c 𝐀" } +-- | ``` +-- | +-- | Thus the length of `(splitAt i s).before` will equal either `i` or +-- | `length s`, if that is shorter. (Or if `i` is negative the length will be +-- | 0.) +-- | +-- | In code: +-- | ```purescript +-- | length (splitAt i s).before == min (max i 0) (length s) +-- | (splitAt i s).before <> (splitAt i s).after == s +-- | splitAt i s == {before: take i s, after: drop i s} +-- | ``` +splitAt :: Int -> String -> { before :: String, after :: String } +splitAt i s = + let before = take i s in + { before + -- inline drop i s to reuse the result of take i s + , after: CU.drop (CU.length before) s + } + +unsurrogate :: Int -> Int -> CodePoint +unsurrogate lead trail = CodePoint ((lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000) + +isLead :: Int -> Boolean +isLead cu = 0xD800 <= cu && cu <= 0xDBFF + +isTrail :: Int -> Boolean +isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF + +fromCharCode :: Int -> String +fromCharCode = CU.singleton <<< toEnumWithDefaults bottom top + +-- WARN: this function expects the String parameter to be non-empty +unsafeCodePointAt0 :: String -> CodePoint +unsafeCodePointAt0 = _unsafeCodePointAt0 unsafeCodePointAt0Fallback + +foreign import _unsafeCodePointAt0 + :: (String -> CodePoint) + -> String + -> CodePoint + +unsafeCodePointAt0Fallback :: String -> CodePoint +unsafeCodePointAt0Fallback s = + let + cu0 = fromEnum (Unsafe.charAt 0 s) + in + if isLead cu0 && CU.length s > 1 + then + let cu1 = fromEnum (Unsafe.charAt 1 s) in + if isTrail cu1 then unsurrogate cu0 cu1 else CodePoint cu0 + else + CodePoint cu0 diff --git a/src/Data/String/CodeUnits.js b/src/Data/String/CodeUnits.js new file mode 100644 index 0000000..2608384 --- /dev/null +++ b/src/Data/String/CodeUnits.js @@ -0,0 +1,116 @@ +export const fromCharArray = function (a) { + return a.join(""); +}; + +export const toCharArray = function (s) { + return s.split(""); +}; + +export const singleton = function (c) { + return c; +}; + +export const _charAt = function (just) { + return function (nothing) { + return function (i) { + return function (s) { + return i >= 0 && i < s.length ? just(s.charAt(i)) : nothing; + }; + }; + }; +}; + +export const _toChar = function (just) { + return function (nothing) { + return function (s) { + return s.length === 1 ? just(s) : nothing; + }; + }; +}; + +export const length = function (s) { + return s.length; +}; + +export const countPrefix = function (p) { + return function (s) { + var i = 0; + while (i < s.length && p(s.charAt(i))) i++; + return i; + }; +}; + +export const _indexOf = function (just) { + return function (nothing) { + return function (x) { + return function (s) { + var i = s.indexOf(x); + return i === -1 ? nothing : just(i); + }; + }; + }; +}; + +export const _indexOfStartingAt = function (just) { + return function (nothing) { + return function (x) { + return function (startAt) { + return function (s) { + if (startAt < 0 || startAt > s.length) return nothing; + var i = s.indexOf(x, startAt); + return i === -1 ? nothing : just(i); + }; + }; + }; + }; +}; + +export const _lastIndexOf = function (just) { + return function (nothing) { + return function (x) { + return function (s) { + var i = s.lastIndexOf(x); + return i === -1 ? nothing : just(i); + }; + }; + }; +}; + +export const _lastIndexOfStartingAt = function (just) { + return function (nothing) { + return function (x) { + return function (startAt) { + return function (s) { + var i = s.lastIndexOf(x, startAt); + return i === -1 ? nothing : just(i); + }; + }; + }; + }; +}; + +export const take = function (n) { + return function (s) { + return s.substr(0, n); + }; +}; + +export const drop = function (n) { + return function (s) { + return s.substring(n); + }; +}; + +export const slice = function (b) { + return function (e) { + return function (s) { + return s.slice(b,e); + }; + }; +}; + +export const splitAt = function (i) { + return function (s) { + return { before: s.substring(0, i), after: s.substring(i) }; + }; +}; diff --git a/src/Data/String/CodeUnits.purs b/src/Data/String/CodeUnits.purs new file mode 100644 index 0000000..ec4d4cd --- /dev/null +++ b/src/Data/String/CodeUnits.purs @@ -0,0 +1,363 @@ +module Data.String.CodeUnits + ( stripPrefix + , stripSuffix + , contains + , singleton + , fromCharArray + , toCharArray + , charAt + , toChar + , uncons + , length + , countPrefix + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , take + , takeRight + , takeWhile + , drop + , dropRight + , dropWhile + , slice + , splitAt + , startsWith + , endsWith + ) where + +import Prelude + +import Data.Maybe (Maybe(..), isJust) +import Data.String.Pattern (Pattern(..)) +import Data.String.Unsafe as U + +------------------------------------------------------------------------------- +-- `stripPrefix`, `stripSuffix`, `startsWith`, `endsWith`, and `contains` are +-- CodeUnit/CodePoint agnostic as they are based on patterns rather than +-- lengths/indices, but they need to be defined in here to avoid a circular +-- module dependency +------------------------------------------------------------------------------- + +-- | If the string starts with the given prefix, return the portion of the +-- | string left after removing it, as a `Just` value. Otherwise, return `Nothing`. +-- | +-- | ```purescript +-- | stripPrefix (Pattern "http:") "http://purescript.org" == Just "//purescript.org" +-- | stripPrefix (Pattern "http:") "https://purescript.org" == Nothing +-- | ``` +stripPrefix :: Pattern -> String -> Maybe String +stripPrefix (Pattern prefix) str = + let { before, after } = splitAt (length prefix) str in + if before == prefix then Just after else Nothing + +-- | If the string ends with the given suffix, return the portion of the +-- | string left after removing it, as a `Just` value. Otherwise, return +-- | `Nothing`. +-- | +-- | ```purescript +-- | stripSuffix (Pattern ".exe") "psc.exe" == Just "psc" +-- | stripSuffix (Pattern ".exe") "psc" == Nothing +-- | ``` +stripSuffix :: Pattern -> String -> Maybe String +stripSuffix (Pattern suffix) str = + let { before, after } = splitAt (length str - length suffix) str in + if after == suffix then Just before else Nothing + +-- | Checks whether the given string starts with the pattern. +-- | +-- | **NOTE**: if you also want to get the string stripped of the pattern, see +-- | `stripPrefix`. +-- | +-- | ```purescript +-- | startsWith (Pattern "foo") "foobar" == true +-- | startsWith (Pattern "bar") "foobar" == false +-- | ``` +startsWith :: Pattern -> String -> Boolean +startsWith pat = isJust <<< stripPrefix pat + +-- | Checks whether the given string ends with the pattern. +-- | +-- | **NOTE**: if you also want to get the string stripped of the pattern, see +-- | `stripSuffix`. +-- | +-- | ```purescript +-- | endsWith (Pattern "bar") "foobar" == true +-- | endsWith (Pattern "foo") "foobar" == false +-- | ``` +endsWith :: Pattern -> String -> Boolean +endsWith pat = isJust <<< stripSuffix pat + +-- | Checks whether the pattern appears in the given string. +-- | +-- | ```purescript +-- | contains (Pattern "needle") "haystack with needle" == true +-- | contains (Pattern "needle") "haystack" == false +-- | ``` +contains :: Pattern -> String -> Boolean +contains pat = isJust <<< indexOf pat + +------------------------------------------------------------------------------- +-- all functions past this point are CodeUnit specific +------------------------------------------------------------------------------- + +-- | Returns a string of length `1` containing the given character. +-- | +-- | ```purescript +-- | singleton 'l' == "l" +-- | ``` +-- | +foreign import singleton :: Char -> String + +-- | Converts an array of characters into a string. +-- | +-- | ```purescript +-- | fromCharArray ['H', 'e', 'l', 'l', 'o'] == "Hello" +-- | ``` +foreign import fromCharArray :: Array Char -> String + +-- | Converts the string into an array of characters. +-- | +-- | ```purescript +-- | toCharArray "Hello☺\n" == ['H','e','l','l','o','☺','\n'] +-- | ``` +foreign import toCharArray :: String -> Array Char + +-- | Returns the character at the given index, if the index is within bounds. +-- | +-- | ```purescript +-- | charAt 2 "Hello" == Just 'l' +-- | charAt 10 "Hello" == Nothing +-- | ``` +-- | +charAt :: Int -> String -> Maybe Char +charAt = _charAt Just Nothing + +foreign import _charAt + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Int + -> String + -> Maybe Char + +-- | Converts the string to a character, if the length of the string is +-- | exactly `1`. +-- | +-- | ```purescript +-- | toChar "l" == Just 'l' +-- | toChar "Hi" == Nothing -- since length is not 1 +-- | ``` +toChar :: String -> Maybe Char +toChar = _toChar Just Nothing + +foreign import _toChar + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> String + -> Maybe Char + +-- | Returns the first character and the rest of the string, +-- | if the string is not empty. +-- | +-- | ```purescript +-- | uncons "" == Nothing +-- | uncons "Hello World" == Just { head: 'H', tail: "ello World" } +-- | ``` +-- | +uncons :: String -> Maybe { head :: Char, tail :: String } +uncons "" = Nothing +uncons s = Just { head: U.charAt zero s, tail: drop one s } + +-- | Returns the number of characters the string is composed of. +-- | +-- | ```purescript +-- | length "Hello World" == 11 +-- | +-- | length "𝐀A" == 3 +-- | -- compare to Data.String.CodePoints: +-- | length "𝐀A" == 2 +-- | ``` +-- | +foreign import length :: String -> Int + +-- | Returns the number of contiguous characters at the beginning +-- | of the string for which the predicate holds. +-- | +-- | ```purescript +-- | countPrefix (_ /= ' ') "Hello World" == 5 -- since length "Hello" == 5 +-- | ``` +-- | +foreign import countPrefix :: (Char -> Boolean) -> String -> Int + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | indexOf (Pattern "c") "abcdc" == Just 2 +-- | indexOf (Pattern "c") "aaa" == Nothing +-- | ``` +-- | +indexOf :: Pattern -> String -> Maybe Int +indexOf = _indexOf Just Nothing + +foreign import _indexOf + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> String + -> Maybe Int + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string, starting at the specified index. Returns `Nothing` if there is +-- | no match. +-- | +-- | ```purescript +-- | indexOf' (Pattern "a") 2 "ababa" == Just 2 +-- | indexOf' (Pattern "a") 3 "ababa" == Just 4 +-- | ``` +-- | +indexOf' :: Pattern -> Int -> String -> Maybe Int +indexOf' = _indexOfStartingAt Just Nothing + +foreign import _indexOfStartingAt + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> Int + -> String + -> Maybe Int + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf (Pattern "c") "abcdc" == Just 4 +-- | lastIndexOf (Pattern "c") "aaa" == Nothing +-- | ``` +-- | +lastIndexOf :: Pattern -> String -> Maybe Int +lastIndexOf = _lastIndexOf Just Nothing + +foreign import _lastIndexOf + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> String + -> Maybe Int + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string, starting at the specified index and searching +-- | backwards towards the beginning of the string. +-- | +-- | Starting at a negative index is equivalent to starting at 0 and +-- | starting at an index greater than the string length is equivalent +-- | to searching in the whole string. +-- | +-- | Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf' (Pattern "a") (-1) "ababa" == Just 0 +-- | lastIndexOf' (Pattern "a") 1 "ababa" == Just 0 +-- | lastIndexOf' (Pattern "a") 3 "ababa" == Just 2 +-- | lastIndexOf' (Pattern "a") 4 "ababa" == Just 4 +-- | lastIndexOf' (Pattern "a") 5 "ababa" == Just 4 +-- | ``` +-- | +lastIndexOf' :: Pattern -> Int -> String -> Maybe Int +lastIndexOf' = _lastIndexOfStartingAt Just Nothing + +foreign import _lastIndexOfStartingAt + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> Int + -> String + -> Maybe Int + +-- | Returns the first `n` characters of the string. +-- | +-- | ```purescript +-- | take 5 "Hello World" == "Hello" +-- | ``` +-- | +foreign import take :: Int -> String -> String + +-- | Returns the last `n` characters of the string. +-- | +-- | ```purescript +-- | takeRight 5 "Hello World" == "World" +-- | ``` +-- | +takeRight :: Int -> String -> String +takeRight i s = drop (length s - i) s + +-- | Returns the longest prefix (possibly empty) of characters that satisfy +-- | the predicate. +-- | +-- | ```purescript +-- | takeWhile (_ /= ':') "http://purescript.org" == "http" +-- | ``` +-- | +takeWhile :: (Char -> Boolean) -> String -> String +takeWhile p s = take (countPrefix p s) s + +-- | Returns the string without the first `n` characters. +-- | +-- | ```purescript +-- | drop 6 "Hello World" == "World" +-- | ``` +-- | +foreign import drop :: Int -> String -> String + +-- | Returns the string without the last `n` characters. +-- | +-- | ```purescript +-- | dropRight 6 "Hello World" == "Hello" +-- | ``` +-- | +dropRight :: Int -> String -> String +dropRight i s = take (length s - i) s + +-- | Returns the suffix remaining after `takeWhile`. +-- | +-- | ```purescript +-- | dropWhile (_ /= '.') "Test.purs" == ".purs" +-- | ``` +-- | +dropWhile :: (Char -> Boolean) -> String -> String +dropWhile p s = drop (countPrefix p s) s + +-- | Returns the substring at indices `[begin, end)`. +-- | If either index is negative, it is normalised to `length s - index`, +-- | where `s` is the input string. `""` is returned if either +-- | index is out of bounds or if `begin > end` after normalisation. +-- | +-- | ```purescript +-- | slice 0 0 "purescript" == "" +-- | slice 0 1 "purescript" == "p" +-- | slice 3 6 "purescript" == "esc" +-- | slice (-4) (-1) "purescript" == "rip" +-- | slice (-4) 3 "purescript" == "" +-- | ``` +foreign import slice :: Int -> Int -> String -> String + +-- | Splits a string into two substrings, where `before` contains the +-- | characters up to (but not including) the given index, and `after` contains +-- | the rest of the string, from that index on. +-- | +-- | ```purescript +-- | splitAt 2 "Hello World" == { before: "He", after: "llo World"} +-- | splitAt 10 "Hi" == { before: "Hi", after: ""} +-- | ``` +-- | +-- | Thus the length of `(splitAt i s).before` will equal either `i` or +-- | `length s`, if that is shorter. (Or if `i` is negative the length will be +-- | 0.) +-- | +-- | In code: +-- | ```purescript +-- | length (splitAt i s).before == min (max i 0) (length s) +-- | (splitAt i s).before <> (splitAt i s).after == s +-- | splitAt i s == {before: take i s, after: drop i s} +-- | ``` +foreign import splitAt :: Int -> String -> { before :: String, after :: String } diff --git a/src/Data/String/Common.js b/src/Data/String/Common.js new file mode 100644 index 0000000..5693585 --- /dev/null +++ b/src/Data/String/Common.js @@ -0,0 +1,52 @@ +export const _localeCompare = function (lt) { + return function (eq) { + return function (gt) { + return function (s1) { + return function (s2) { + var result = s1.localeCompare(s2); + return result < 0 ? lt : result > 0 ? gt : eq; + }; + }; + }; + }; +}; + +export const replace = function (s1) { + return function (s2) { + return function (s3) { + return s3.replace(s1, s2); + }; + }; +}; + +export const replaceAll = function (s1) { + return function (s2) { + return function (s3) { + return s3.replace(new RegExp(s1.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&"), "g"), s2); // eslint-disable-line no-useless-escape + }; + }; +}; + +export const split = function (sep) { + return function (s) { + return s.split(sep); + }; +}; + +export const toLower = function (s) { + return s.toLowerCase(); +}; + +export const toUpper = function (s) { + return s.toUpperCase(); +}; + +export const trim = function (s) { + return s.trim(); +}; + +export const joinWith = function (s) { + return function (xs) { + return xs.join(s); + }; +}; diff --git a/src/Data/String/Common.purs b/src/Data/String/Common.purs new file mode 100644 index 0000000..9e3132e --- /dev/null +++ b/src/Data/String/Common.purs @@ -0,0 +1,96 @@ +module Data.String.Common + ( null + , localeCompare + , replace + , replaceAll + , split + , toLower + , toUpper + , trim + , joinWith + ) where + +import Prelude + +import Data.String.Pattern (Pattern, Replacement) + +-- | Returns `true` if the given string is empty. +-- | +-- | ```purescript +-- | null "" == true +-- | null "Hi" == false +-- | ``` +null :: String -> Boolean +null s = s == "" + +-- | Compare two strings in a locale-aware fashion. This is in contrast to +-- | the `Ord` instance on `String` which treats strings as arrays of code +-- | units: +-- | +-- | ```purescript +-- | "Γ€" `localeCompare` "b" == LT +-- | "Γ€" `compare` "b" == GT +-- | ``` +localeCompare :: String -> String -> Ordering +localeCompare = _localeCompare LT EQ GT + +foreign import _localeCompare + :: Ordering + -> Ordering + -> Ordering + -> String + -> String + -> Ordering + +-- | Replaces the first occurence of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replace (Pattern "<=") (Replacement "≀") "a <= b <= c" == "a ≀ b <= c" +-- | ``` +foreign import replace :: Pattern -> Replacement -> String -> String + +-- | Replaces all occurences of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replaceAll (Pattern "<=") (Replacement "≀") "a <= b <= c" == "a ≀ b ≀ c" +-- | ``` +foreign import replaceAll :: Pattern -> Replacement -> String -> String + +-- | Returns the substrings of the second string separated along occurences +-- | of the first string. +-- | +-- | ```purescript +-- | split (Pattern " ") "hello world" == ["hello", "world"] +-- | ``` +foreign import split :: Pattern -> String -> Array String + +-- | Returns the argument converted to lowercase. +-- | +-- | ```purescript +-- | toLower "hElLo" == "hello" +-- | ``` +foreign import toLower :: String -> String + +-- | Returns the argument converted to uppercase. +-- | +-- | ```purescript +-- | toUpper "Hello" == "HELLO" +-- | ``` +foreign import toUpper :: String -> String + +-- | Removes whitespace from the beginning and end of a string, including +-- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) +-- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). +-- | +-- | ```purescript +-- | trim " Hello \n World\n\t " == "Hello \n World" +-- | ``` +foreign import trim :: String -> String + +-- | Joins the strings in the array together, inserting the first argument +-- | as separator between them. +-- | +-- | ```purescript +-- | joinWith ", " ["apple", "banana", "orange"] == "apple, banana, orange" +-- | ``` +foreign import joinWith :: String -> Array String -> String diff --git a/src/Data/String/Gen.purs b/src/Data/String/Gen.purs new file mode 100644 index 0000000..845b5e8 --- /dev/null +++ b/src/Data/String/Gen.purs @@ -0,0 +1,43 @@ +module Data.String.Gen where + +import Prelude + +import Control.Monad.Gen (class MonadGen, chooseInt, unfoldable, sized, resize) +import Control.Monad.Rec.Class (class MonadRec) +import Data.Char.Gen as CG +import Data.String.CodeUnits as SCU + +-- | Generates a string using the specified character generator. +genString :: forall m. MonadRec m => MonadGen m => m Char -> m String +genString genChar = sized \size -> do + newSize <- chooseInt 1 (max 1 size) + resize (const newSize) $ SCU.fromCharArray <$> unfoldable genChar + +-- | Generates a string using characters from the Unicode basic multilingual +-- | plain. +genUnicodeString :: forall m. MonadRec m => MonadGen m => m String +genUnicodeString = genString CG.genUnicodeChar + +-- | Generates a string using the ASCII character set, excluding control codes. +genAsciiString :: forall m. MonadRec m => MonadGen m => m String +genAsciiString = genString CG.genAsciiChar + +-- | Generates a string using the ASCII character set. +genAsciiString' :: forall m. MonadRec m => MonadGen m => m String +genAsciiString' = genString CG.genAsciiChar' + +-- | Generates a string made up of numeric digits. +genDigitString :: forall m. MonadRec m => MonadGen m => m String +genDigitString = genString CG.genDigitChar + +-- | Generates a string using characters from the basic Latin alphabet. +genAlphaString :: forall m. MonadRec m => MonadGen m => m String +genAlphaString = genString CG.genAlpha + +-- | Generates a string using lowercase characters from the basic Latin alphabet. +genAlphaLowercaseString :: forall m. MonadRec m => MonadGen m => m String +genAlphaLowercaseString = genString CG.genAlphaLowercase + +-- | Generates a string using uppercase characters from the basic Latin alphabet. +genAlphaUppercaseString :: forall m. MonadRec m => MonadGen m => m String +genAlphaUppercaseString = genString CG.genAlphaUppercase diff --git a/src/Data/String/NonEmpty.purs b/src/Data/String/NonEmpty.purs new file mode 100644 index 0000000..72e10b3 --- /dev/null +++ b/src/Data/String/NonEmpty.purs @@ -0,0 +1,9 @@ +module Data.String.NonEmpty + ( module Data.String.Pattern + , module Data.String.NonEmpty.Internal + , module Data.String.NonEmpty.CodePoints + ) where + +import Data.String.NonEmpty.Internal (NonEmptyString, class MakeNonEmpty, NonEmptyReplacement(..), appendString, contains, fromString, join1With, joinWith, joinWith1, localeCompare, nes, prependString, replace, replaceAll, stripPrefix, stripSuffix, startsWith, endsWith, toLower, toString, toUpper, trim, unsafeFromString) +import Data.String.Pattern (Pattern(..)) +import Data.String.NonEmpty.CodePoints diff --git a/src/Data/String/NonEmpty/CaseInsensitive.purs b/src/Data/String/NonEmpty/CaseInsensitive.purs new file mode 100644 index 0000000..d1c1719 --- /dev/null +++ b/src/Data/String/NonEmpty/CaseInsensitive.purs @@ -0,0 +1,22 @@ +module Data.String.NonEmpty.CaseInsensitive where + +import Prelude + +import Data.Newtype (class Newtype) +import Data.String.NonEmpty (NonEmptyString, toLower) + +-- | A newtype for case insensitive string comparisons and ordering. +newtype CaseInsensitiveNonEmptyString = CaseInsensitiveNonEmptyString NonEmptyString + +instance eqCaseInsensitiveNonEmptyString :: Eq CaseInsensitiveNonEmptyString where + eq (CaseInsensitiveNonEmptyString s1) (CaseInsensitiveNonEmptyString s2) = + toLower s1 == toLower s2 + +instance ordCaseInsensitiveNonEmptyString :: Ord CaseInsensitiveNonEmptyString where + compare (CaseInsensitiveNonEmptyString s1) (CaseInsensitiveNonEmptyString s2) = + compare (toLower s1) (toLower s2) + +instance showCaseInsensitiveNonEmptyString :: Show CaseInsensitiveNonEmptyString where + show (CaseInsensitiveNonEmptyString s) = "(CaseInsensitiveNonEmptyString " <> show s <> ")" + +derive instance newtypeCaseInsensitiveNonEmptyString :: Newtype CaseInsensitiveNonEmptyString _ diff --git a/src/Data/String/NonEmpty/CodePoints.purs b/src/Data/String/NonEmpty/CodePoints.purs new file mode 100644 index 0000000..7b5328a --- /dev/null +++ b/src/Data/String/NonEmpty/CodePoints.purs @@ -0,0 +1,138 @@ +module Data.String.NonEmpty.CodePoints + ( fromCodePointArray + , fromNonEmptyCodePointArray + , singleton + , cons + , snoc + , fromFoldable1 + , toCodePointArray + , toNonEmptyCodePointArray + , codePointAt + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , uncons + , length + , take + -- takeRight + , takeWhile + , drop + -- dropRight + , dropWhile + , countPrefix + , splitAt + ) where + +import Prelude + +import Data.Array.NonEmpty (NonEmptyArray) +import Data.Array.NonEmpty as NEA +import Data.Maybe (Maybe(..), fromJust) +import Data.Semigroup.Foldable (class Foldable1) +import Data.Semigroup.Foldable as F1 +import Data.String.CodePoints (CodePoint) +import Data.String.CodePoints as CP +import Data.String.NonEmpty.Internal (NonEmptyString(..), fromString) +import Data.String.Pattern (Pattern) +import Partial.Unsafe (unsafePartial) + +-- For internal use only. Do not export. +toNonEmptyString :: String -> NonEmptyString +toNonEmptyString = NonEmptyString + +-- For internal use only. Do not export. +fromNonEmptyString :: NonEmptyString -> String +fromNonEmptyString (NonEmptyString s) = s + +-- For internal use only. Do not export. +liftS :: forall r. (String -> r) -> NonEmptyString -> r +liftS f (NonEmptyString s) = f s + +fromCodePointArray :: Array CodePoint -> Maybe NonEmptyString +fromCodePointArray = case _ of + [] -> Nothing + cs -> Just (toNonEmptyString (CP.fromCodePointArray cs)) + +fromNonEmptyCodePointArray :: NonEmptyArray CodePoint -> NonEmptyString +fromNonEmptyCodePointArray = unsafePartial fromJust <<< fromCodePointArray <<< NEA.toArray + +singleton :: CodePoint -> NonEmptyString +singleton = toNonEmptyString <<< CP.singleton + +cons :: CodePoint -> String -> NonEmptyString +cons c s = toNonEmptyString (CP.singleton c <> s) + +snoc :: CodePoint -> String -> NonEmptyString +snoc c s = toNonEmptyString (s <> CP.singleton c) + +fromFoldable1 :: forall f. Foldable1 f => f CodePoint -> NonEmptyString +fromFoldable1 = F1.foldMap1 singleton + +toCodePointArray :: NonEmptyString -> Array CodePoint +toCodePointArray = CP.toCodePointArray <<< fromNonEmptyString + +toNonEmptyCodePointArray :: NonEmptyString -> NonEmptyArray CodePoint +toNonEmptyCodePointArray = unsafePartial fromJust <<< NEA.fromArray <<< toCodePointArray + +codePointAt :: Int -> NonEmptyString -> Maybe CodePoint +codePointAt = liftS <<< CP.codePointAt + +indexOf :: Pattern -> NonEmptyString -> Maybe Int +indexOf = liftS <<< CP.indexOf + +indexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +indexOf' pat = liftS <<< CP.indexOf' pat + +lastIndexOf :: Pattern -> NonEmptyString -> Maybe Int +lastIndexOf = liftS <<< CP.lastIndexOf + +lastIndexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +lastIndexOf' pat = liftS <<< CP.lastIndexOf' pat + +uncons :: NonEmptyString -> { head :: CodePoint, tail :: Maybe NonEmptyString } +uncons nes = + let + s = fromNonEmptyString nes + in + { head: unsafePartial fromJust (CP.codePointAt 0 s) + , tail: fromString (CP.drop 1 s) + } + +length :: NonEmptyString -> Int +length = CP.length <<< fromNonEmptyString + +take :: Int -> NonEmptyString -> Maybe NonEmptyString +take i nes = + let + s = fromNonEmptyString nes + in + if i < 1 + then Nothing + else Just (toNonEmptyString (CP.take i s)) + +takeWhile :: (CodePoint -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +takeWhile f = fromString <<< liftS (CP.takeWhile f) + +drop :: Int -> NonEmptyString -> Maybe NonEmptyString +drop i nes = + let + s = fromNonEmptyString nes + in + if i >= CP.length s + then Nothing + else Just (toNonEmptyString (CP.drop i s)) + +dropWhile :: (CodePoint -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +dropWhile f = fromString <<< liftS (CP.dropWhile f) + +countPrefix :: (CodePoint -> Boolean) -> NonEmptyString -> Int +countPrefix = liftS <<< CP.countPrefix + +splitAt + :: Int + -> NonEmptyString + -> { before :: Maybe NonEmptyString, after :: Maybe NonEmptyString } +splitAt i nes = + case CP.splitAt i (fromNonEmptyString nes) of + { before, after } -> { before: fromString before, after: fromString after } diff --git a/src/Data/String/NonEmpty/CodeUnits.purs b/src/Data/String/NonEmpty/CodeUnits.purs new file mode 100644 index 0000000..4e97244 --- /dev/null +++ b/src/Data/String/NonEmpty/CodeUnits.purs @@ -0,0 +1,304 @@ +module Data.String.NonEmpty.CodeUnits + ( fromCharArray + , fromNonEmptyCharArray + , singleton + , cons + , snoc + , fromFoldable1 + , toCharArray + , toNonEmptyCharArray + , charAt + , toChar + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , uncons + , length + , take + , takeRight + , takeWhile + , drop + , dropRight + , dropWhile + , countPrefix + , splitAt + ) where + +import Prelude + +import Data.Array.NonEmpty (NonEmptyArray) +import Data.Array.NonEmpty as NEA +import Data.Maybe (Maybe(..), fromJust) +import Data.Semigroup.Foldable (class Foldable1) +import Data.Semigroup.Foldable as F1 +import Data.String.CodeUnits as CU +import Data.String.NonEmpty.Internal (NonEmptyString(..), fromString) +import Data.String.Pattern (Pattern) +import Data.String.Unsafe as U +import Partial.Unsafe (unsafePartial) + +-- For internal use only. Do not export. +toNonEmptyString :: String -> NonEmptyString +toNonEmptyString = NonEmptyString + +-- For internal use only. Do not export. +fromNonEmptyString :: NonEmptyString -> String +fromNonEmptyString (NonEmptyString s) = s + +-- For internal use only. Do not export. +liftS :: forall r. (String -> r) -> NonEmptyString -> r +liftS f (NonEmptyString s) = f s + +-- | Creates a `NonEmptyString` from a character array `String`, returning +-- | `Nothing` if the input is empty. +-- | +-- | ```purescript +-- | fromCharArray [] = Nothing +-- | fromCharArray ['a', 'b', 'c'] = Just (NonEmptyString "abc") +-- | ``` +fromCharArray :: Array Char -> Maybe NonEmptyString +fromCharArray = case _ of + [] -> Nothing + cs -> Just (toNonEmptyString (CU.fromCharArray cs)) + +fromNonEmptyCharArray :: NonEmptyArray Char -> NonEmptyString +fromNonEmptyCharArray = unsafePartial fromJust <<< fromCharArray <<< NEA.toArray + +-- | Creates a `NonEmptyString` from a character. +singleton :: Char -> NonEmptyString +singleton = toNonEmptyString <<< CU.singleton + +-- | Creates a `NonEmptyString` from a string by prepending a character. +-- | +-- | ```purescript +-- | cons 'a' "bc" = NonEmptyString "abc" +-- | cons 'a' "" = NonEmptyString "a" +-- | ``` +cons :: Char -> String -> NonEmptyString +cons c s = toNonEmptyString (CU.singleton c <> s) + +-- | Creates a `NonEmptyString` from a string by appending a character. +-- | +-- | ```purescript +-- | snoc 'c' "ab" = NonEmptyString "abc" +-- | snoc 'a' "" = NonEmptyString "a" +-- | ``` +snoc :: Char -> String -> NonEmptyString +snoc c s = toNonEmptyString (s <> CU.singleton c) + +-- | Creates a `NonEmptyString` from a `Foldable1` container carrying +-- | characters. +fromFoldable1 :: forall f. Foldable1 f => f Char -> NonEmptyString +fromFoldable1 = F1.foldMap1 singleton + +-- | Converts the `NonEmptyString` into an array of characters. +-- | +-- | ```purescript +-- | toCharArray (NonEmptyString "Hello☺\n") == ['H','e','l','l','o','☺','\n'] +-- | ``` +toCharArray :: NonEmptyString -> Array Char +toCharArray = CU.toCharArray <<< fromNonEmptyString + +-- | Converts the `NonEmptyString` into a non-empty array of characters. +toNonEmptyCharArray :: NonEmptyString -> NonEmptyArray Char +toNonEmptyCharArray = unsafePartial fromJust <<< NEA.fromArray <<< toCharArray + +-- | Returns the character at the given index, if the index is within bounds. +-- | +-- | ```purescript +-- | charAt 2 (NonEmptyString "Hello") == Just 'l' +-- | charAt 10 (NonEmptyString "Hello") == Nothing +-- | ``` +charAt :: Int -> NonEmptyString -> Maybe Char +charAt = liftS <<< CU.charAt + +-- | Converts the `NonEmptyString` to a character, if the length of the string +-- | is exactly `1`. +-- | +-- | ```purescript +-- | toChar "H" == Just 'H' +-- | toChar "Hi" == Nothing +-- | ``` +toChar :: NonEmptyString -> Maybe Char +toChar = CU.toChar <<< fromNonEmptyString + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | indexOf (Pattern "c") (NonEmptyString "abcdc") == Just 2 +-- | indexOf (Pattern "c") (NonEmptyString "aaa") == Nothing +-- | ``` +indexOf :: Pattern -> NonEmptyString -> Maybe Int +indexOf = liftS <<< CU.indexOf + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string, starting at the specified index. Returns `Nothing` if there is +-- | no match. +-- | +-- | ```purescript +-- | indexOf' (Pattern "a") 2 (NonEmptyString "ababa") == Just 2 +-- | indexOf' (Pattern "a") 3 (NonEmptyString "ababa") == Just 4 +-- | ``` +indexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +indexOf' pat = liftS <<< CU.indexOf' pat + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf (Pattern "c") (NonEmptyString "abcdc") == Just 4 +-- | lastIndexOf (Pattern "c") (NonEmptyString "aaa") == Nothing +-- | ``` +lastIndexOf :: Pattern -> NonEmptyString -> Maybe Int +lastIndexOf = liftS <<< CU.lastIndexOf + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string, starting at the specified index and searching +-- | backwards towards the beginning of the string. +-- | +-- | Starting at a negative index is equivalent to starting at 0 and +-- | starting at an index greater than the string length is equivalent +-- | to searching in the whole string. +-- | +-- | Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf' (Pattern "a") (-1) (NonEmptyString "ababa") == Just 0 +-- | lastIndexOf' (Pattern "a") 1 (NonEmptyString "ababa") == Just 0 +-- | lastIndexOf' (Pattern "a") 3 (NonEmptyString "ababa") == Just 2 +-- | lastIndexOf' (Pattern "a") 4 (NonEmptyString "ababa") == Just 4 +-- | lastIndexOf' (Pattern "a") 5 (NonEmptyString "ababa") == Just 4 +-- | ``` +lastIndexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +lastIndexOf' pat = liftS <<< CU.lastIndexOf' pat + +-- | Returns the first character and the rest of the string. +-- | +-- | ```purescript +-- | uncons "a" == { head: 'a', tail: Nothing } +-- | uncons "Hello World" == { head: 'H', tail: Just (NonEmptyString "ello World") } +-- | ``` +uncons :: NonEmptyString -> { head :: Char, tail :: Maybe NonEmptyString } +uncons nes = + let + s = fromNonEmptyString nes + in + { head: U.charAt 0 s + , tail: fromString (CU.drop 1 s) + } + +-- | Returns the number of characters the string is composed of. +-- | +-- | ```purescript +-- | length (NonEmptyString "Hello World") == 11 +-- | ``` +length :: NonEmptyString -> Int +length = CU.length <<< fromNonEmptyString + +-- | Returns the first `n` characters of the string. Returns `Nothing` if `n` is +-- | less than 1. +-- | +-- | ```purescript +-- | take 5 (NonEmptyString "Hello World") == Just (NonEmptyString "Hello") +-- | take 0 (NonEmptyString "Hello World") == Nothing +-- | ``` +take :: Int -> NonEmptyString -> Maybe NonEmptyString +take i nes = + let + s = fromNonEmptyString nes + in + if i < 1 + then Nothing + else Just (toNonEmptyString (CU.take i s)) + +-- | Returns the last `n` characters of the string. Returns `Nothing` if `n` is +-- | less than 1. +-- | +-- | ```purescript +-- | take 5 (NonEmptyString "Hello World") == Just (NonEmptyString "World") +-- | take 0 (NonEmptyString "Hello World") == Nothing +-- | ``` +takeRight :: Int -> NonEmptyString -> Maybe NonEmptyString +takeRight i nes = + let + s = fromNonEmptyString nes + in + if i < 1 + then Nothing + else Just (toNonEmptyString (CU.takeRight i s)) + +-- | Returns the longest prefix of characters that satisfy the predicate. +-- | `Nothing` is returned if there is no matching prefix. +-- | +-- | ```purescript +-- | takeWhile (_ /= ':') (NonEmptyString "http://purescript.org") == Just (NonEmptyString "http") +-- | takeWhile (_ == 'a') (NonEmptyString "xyz") == Nothing +-- | ``` +takeWhile :: (Char -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +takeWhile f = fromString <<< liftS (CU.takeWhile f) + +-- | Returns the string without the first `n` characters. Returns `Nothing` if +-- | more characters are dropped than the string is long. +-- | +-- | ```purescript +-- | drop 6 (NonEmptyString "Hello World") == Just (NonEmptyString "World") +-- | drop 20 (NonEmptyString "Hello World") == Nothing +-- | ``` +drop :: Int -> NonEmptyString -> Maybe NonEmptyString +drop i nes = + let + s = fromNonEmptyString nes + in + if i >= CU.length s + then Nothing + else Just (toNonEmptyString (CU.drop i s)) + +-- | Returns the string without the last `n` characters. Returns `Nothing` if +-- | more characters are dropped than the string is long. +-- | +-- | ```purescript +-- | dropRight 6 (NonEmptyString "Hello World") == Just (NonEmptyString "Hello") +-- | dropRight 20 (NonEmptyString "Hello World") == Nothing +-- | ``` +dropRight :: Int -> NonEmptyString -> Maybe NonEmptyString +dropRight i nes = + let + s = fromNonEmptyString nes + in + if i >= CU.length s + then Nothing + else Just (toNonEmptyString (CU.dropRight i s)) + +-- | Returns the suffix remaining after `takeWhile`. +-- | +-- | ```purescript +-- | dropWhile (_ /= '.') (NonEmptyString "Test.purs") == Just (NonEmptyString ".purs") +-- | ``` +dropWhile :: (Char -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +dropWhile f = fromString <<< liftS (CU.dropWhile f) + +-- | Returns the number of contiguous characters at the beginning of the string +-- | for which the predicate holds. +-- | +-- | ```purescript +-- | countPrefix (_ /= 'o') (NonEmptyString "Hello World") == 4 +-- | ``` +countPrefix :: (Char -> Boolean) -> NonEmptyString -> Int +countPrefix = liftS <<< CU.countPrefix + +-- | Returns the substrings of a split at the given index, if the index is +-- | within bounds. +-- | +-- | ```purescript +-- | splitAt 2 (NonEmptyString "Hello World") == Just { before: Just (NonEmptyString "He"), after: Just (NonEmptyString "llo World") } +-- | splitAt 10 (NonEmptyString "Hi") == Nothing +-- | ``` +splitAt + :: Int + -> NonEmptyString + -> { before :: Maybe NonEmptyString, after :: Maybe NonEmptyString } +splitAt i nes = + case CU.splitAt i (fromNonEmptyString nes) of + { before, after } -> { before: fromString before, after: fromString after } diff --git a/src/Data/String/NonEmpty/Internal.purs b/src/Data/String/NonEmpty/Internal.purs new file mode 100644 index 0000000..0b12623 --- /dev/null +++ b/src/Data/String/NonEmpty/Internal.purs @@ -0,0 +1,257 @@ +-- | While most of the code in this module is safe, this module does +-- | export a few partial functions and the `NonEmptyString` constructor. +-- | While the partial functions are obvious from the `Partial` constraint in +-- | their type signature, the `NonEmptyString` constructor can be overlooked +-- | when searching for issues in one's code. See the constructor's +-- | documentation for more information. +module Data.String.NonEmpty.Internal where + +import Prelude + +import Data.Foldable (class Foldable) +import Data.Foldable as F +import Data.Maybe (Maybe(..), fromJust) +import Data.Semigroup.Foldable (class Foldable1) +import Data.String as String +import Data.String.Pattern (Pattern) +import Data.Symbol (class IsSymbol, reflectSymbol) +import Prim.TypeError as TE +import Type.Proxy (Proxy) +import Unsafe.Coerce (unsafeCoerce) + +-- | A string that is known not to be empty. +-- | +-- | You can use this constructor to create a `NonEmptyString` that isn't +-- | non-empty, breaking the guarantee behind this newtype. It is +-- | provided as an escape hatch mainly for the `Data.NonEmpty.CodeUnits` +-- | and `Data.NonEmpty.CodePoints` modules. Use this at your own risk +-- | when you know what you are doing. +newtype NonEmptyString = NonEmptyString String + +derive newtype instance eqNonEmptyString ∷ Eq NonEmptyString +derive newtype instance ordNonEmptyString ∷ Ord NonEmptyString +derive newtype instance semigroupNonEmptyString ∷ Semigroup NonEmptyString + +instance showNonEmptyString :: Show NonEmptyString where + show (NonEmptyString s) = "(NonEmptyString.unsafeFromString " <> show s <> ")" + +-- | A helper class for defining non-empty string values at compile time. +-- | +-- | ``` purescript +-- | something :: NonEmptyString +-- | something = nes (Proxy :: Proxy "something") +-- | ``` +class MakeNonEmpty (s :: Symbol) where + nes :: Proxy s -> NonEmptyString + +instance makeNonEmptyBad :: TE.Fail (TE.Text "Cannot create an NonEmptyString from an empty Symbol") => MakeNonEmpty "" where + nes _ = NonEmptyString "" + +else instance nonEmptyNonEmpty :: IsSymbol s => MakeNonEmpty s where + nes p = NonEmptyString (reflectSymbol p) + +-- | A newtype used in cases to specify a non-empty replacement for a pattern. +newtype NonEmptyReplacement = NonEmptyReplacement NonEmptyString + +derive newtype instance eqNonEmptyReplacement :: Eq NonEmptyReplacement +derive newtype instance ordNonEmptyReplacement :: Ord NonEmptyReplacement +derive newtype instance semigroupNonEmptyReplacement ∷ Semigroup NonEmptyReplacement + +instance showNonEmptyReplacement :: Show NonEmptyReplacement where + show (NonEmptyReplacement s) = "(NonEmptyReplacement " <> show s <> ")" + +-- | Creates a `NonEmptyString` from a `String`, returning `Nothing` if the +-- | input is empty. +-- | +-- | ```purescript +-- | fromString "" = Nothing +-- | fromString "hello" = Just (NES.unsafeFromString "hello") +-- | ``` +fromString :: String -> Maybe NonEmptyString +fromString = case _ of + "" -> Nothing + s -> Just (NonEmptyString s) + +-- | A partial version of `fromString`. +unsafeFromString :: Partial => String -> NonEmptyString +unsafeFromString = fromJust <<< fromString + +-- | Converts a `NonEmptyString` back into a standard `String`. +toString :: NonEmptyString -> String +toString (NonEmptyString s) = s + +-- | Appends a string to this non-empty string. Since one of the strings is +-- | non-empty we know the result will be too. +-- | +-- | ```purescript +-- | appendString (NonEmptyString "Hello") " world" == NonEmptyString "Hello world" +-- | appendString (NonEmptyString "Hello") "" == NonEmptyString "Hello" +-- | ``` +appendString :: NonEmptyString -> String -> NonEmptyString +appendString (NonEmptyString s1) s2 = NonEmptyString (s1 <> s2) + +-- | Prepends a string to this non-empty string. Since one of the strings is +-- | non-empty we know the result will be too. +-- | +-- | ```purescript +-- | prependString "be" (NonEmptyString "fore") == NonEmptyString "before" +-- | prependString "" (NonEmptyString "fore") == NonEmptyString "fore" +-- | ``` +prependString :: String -> NonEmptyString -> NonEmptyString +prependString s1 (NonEmptyString s2) = NonEmptyString (s1 <> s2) + +-- | If the string starts with the given prefix, return the portion of the +-- | string left after removing it. If the prefix does not match or there is no +-- | remainder, the result will be `Nothing`. +-- | +-- | ```purescript +-- | stripPrefix (Pattern "http:") (NonEmptyString "http://purescript.org") == Just (NonEmptyString "//purescript.org") +-- | stripPrefix (Pattern "http:") (NonEmptyString "https://purescript.org") == Nothing +-- | stripPrefix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing +-- | ``` +stripPrefix :: Pattern -> NonEmptyString -> Maybe NonEmptyString +stripPrefix pat = fromString <=< liftS (String.stripPrefix pat) + +-- | If the string ends with the given suffix, return the portion of the +-- | string left after removing it. If the suffix does not match or there is no +-- | remainder, the result will be `Nothing`. +-- | +-- | ```purescript +-- | stripSuffix (Pattern ".exe") (NonEmptyString "purs.exe") == Just (NonEmptyString "purs") +-- | stripSuffix (Pattern ".exe") (NonEmptyString "purs") == Nothing +-- | stripSuffix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing +-- | ``` +stripSuffix :: Pattern -> NonEmptyString -> Maybe NonEmptyString +stripSuffix pat = fromString <=< liftS (String.stripSuffix pat) + + +-- | Checks whether the given string starts with the pattern. +-- | +-- | **NOTE**: if you also want to get the string stripped of the pattern, see +-- | `stripPrefix`. +-- | +-- | ```purescript +-- | startsWith (Pattern "foo") (NonEmptyString "foobar") == true +-- | startsWith (Pattern "bar") (NonEmptyString "foobar") == false +-- | ``` +startsWith :: Pattern -> NonEmptyString -> Boolean +startsWith = liftS <<< String.startsWith + +-- | Checks whether the given string ends with the pattern. +-- | +-- | **NOTE**: if you also want to get the string stripped of the pattern, see +-- | `stripSuffix`. +-- | +-- | ```purescript +-- | endsWith (Pattern "bar") (NonEmptyString "foobar") == true +-- | endsWith (Pattern "foo") (NonEmptyString "foobar") == false +-- | ``` +endsWith :: Pattern -> NonEmptyString -> Boolean +endsWith = liftS <<< String.endsWith + +-- | Checks whether the pattern appears in the given string. +-- | +-- | ```purescript +-- | contains (Pattern "needle") (NonEmptyString "haystack with needle") == true +-- | contains (Pattern "needle") (NonEmptyString "haystack") == false +-- | ``` +contains :: Pattern -> NonEmptyString -> Boolean +contains = liftS <<< String.contains + +-- | Compare two strings in a locale-aware fashion. This is in contrast to +-- | the `Ord` instance on `String` which treats strings as arrays of code +-- | units: +-- | +-- | ```purescript +-- | NonEmptyString "Γ€" `localeCompare` NonEmptyString "b" == LT +-- | NonEmptyString "Γ€" `compare` NonEmptyString "b" == GT +-- | ``` +localeCompare :: NonEmptyString -> NonEmptyString -> Ordering +localeCompare (NonEmptyString a) (NonEmptyString b) = String.localeCompare a b + +-- | Replaces the first occurence of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replace (Pattern "<=") (NonEmptyReplacement "≀") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≀ b <= c" +-- | ``` +replace :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString +replace pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = + NonEmptyString (String.replace pat (String.Replacement rep) s) + +-- | Replaces all occurences of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replaceAll (Pattern "<=") (NonEmptyReplacement "≀") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≀ b ≀ c" +-- | ``` +replaceAll :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString +replaceAll pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = + NonEmptyString (String.replaceAll pat (String.Replacement rep) s) + +-- | Returns the argument converted to lowercase. +-- | +-- | ```purescript +-- | toLower (NonEmptyString "hElLo") == NonEmptyString "hello" +-- | ``` +toLower :: NonEmptyString -> NonEmptyString +toLower (NonEmptyString s) = NonEmptyString (String.toLower s) + +-- | Returns the argument converted to uppercase. +-- | +-- | ```purescript +-- | toUpper (NonEmptyString "Hello") == NonEmptyString "HELLO" +-- | ``` +toUpper :: NonEmptyString -> NonEmptyString +toUpper (NonEmptyString s) = NonEmptyString (String.toUpper s) + +-- | Removes whitespace from the beginning and end of a string, including +-- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) +-- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). +-- | If the string is entirely made up of whitespace the result will be Nothing. +-- | +-- | ```purescript +-- | trim (NonEmptyString " Hello \n World\n\t ") == Just (NonEmptyString "Hello \n World") +-- | trim (NonEmptyString " \n") == Nothing +-- | ``` +trim :: NonEmptyString -> Maybe NonEmptyString +trim (NonEmptyString s) = fromString (String.trim s) + +-- | Joins the strings in a container together as a new string, inserting the +-- | first argument as separator between them. The result is not guaranteed to +-- | be non-empty. +-- | +-- | ```purescript +-- | joinWith ", " [NonEmptyString "apple", NonEmptyString "banana"] == "apple, banana" +-- | joinWith ", " [] == "" +-- | ``` +joinWith :: forall f. Foldable f => String -> f NonEmptyString -> String +joinWith splice = F.intercalate splice <<< coe + where + coe :: f NonEmptyString -> f String + coe = unsafeCoerce + +-- | Joins non-empty strings in a non-empty container together as a new +-- | non-empty string, inserting a possibly empty string as separator between +-- | them. The result is guaranteed to be non-empty. +-- | +-- | ```purescript +-- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` +-- | join1With ", " [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "apple, banana" +-- | join1With "" [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "applebanana" +-- | ``` +join1With :: forall f. Foldable1 f => String -> f NonEmptyString -> NonEmptyString +join1With splice = NonEmptyString <<< joinWith splice + +-- | Joins possibly empty strings in a non-empty container together as a new +-- | non-empty string, inserting a non-empty string as a separator between them. +-- | The result is guaranteed to be non-empty. +-- | +-- | ```purescript +-- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` +-- | joinWith1 (NonEmptyString ", ") ["apple", "banana"] == NonEmptyString "apple, banana" +-- | joinWith1 (NonEmptyString "/") ["a", "b", "", "c", ""] == NonEmptyString "a/b//c/" +-- | ``` +joinWith1 :: forall f. Foldable1 f => NonEmptyString -> f String -> NonEmptyString +joinWith1 (NonEmptyString splice) = NonEmptyString <<< F.intercalate splice + +liftS :: forall r. (String -> r) -> NonEmptyString -> r +liftS f (NonEmptyString s) = f s diff --git a/src/Data/String/Pattern.purs b/src/Data/String/Pattern.purs new file mode 100644 index 0000000..e0aea96 --- /dev/null +++ b/src/Data/String/Pattern.purs @@ -0,0 +1,33 @@ +module Data.String.Pattern where + +import Prelude + +import Data.Newtype (class Newtype) + +-- | A newtype used in cases where there is a string to be matched. +-- | +-- | ```purescript +-- | pursPattern = Pattern ".purs" +-- | --can be used like this: +-- | contains pursPattern "Test.purs" +-- | == true +-- | ``` +-- | +newtype Pattern = Pattern String + +derive instance eqPattern :: Eq Pattern +derive instance ordPattern :: Ord Pattern +derive instance newtypePattern :: Newtype Pattern _ + +instance showPattern :: Show Pattern where + show (Pattern s) = "(Pattern " <> show s <> ")" + +-- | A newtype used in cases to specify a replacement for a pattern. +newtype Replacement = Replacement String + +derive instance eqReplacement :: Eq Replacement +derive instance ordReplacement :: Ord Replacement +derive instance newtypeReplacement :: Newtype Replacement _ + +instance showReplacement :: Show Replacement where + show (Replacement s) = "(Replacement " <> show s <> ")" diff --git a/src/Data/String/Regex.js b/src/Data/String/Regex.js new file mode 100644 index 0000000..3196034 --- /dev/null +++ b/src/Data/String/Regex.js @@ -0,0 +1,103 @@ +export const showRegexImpl = function (r) { + return "" + r; +}; + +export const regexImpl = function (left) { + return function (right) { + return function (s1) { + return function (s2) { + try { + return right(new RegExp(s1, s2)); + } catch (e) { + return left(e.message); + } + }; + }; + }; +}; + +export const source = function (r) { + return r.source; +}; + +export const flagsImpl = function (r) { + return { + multiline: r.multiline, + ignoreCase: r.ignoreCase, + global: r.global, + dotAll: r.dotAll, + sticky: !!r.sticky, + unicode: !!r.unicode + }; +}; + +export const test = function (r) { + return function (s) { + var lastIndex = r.lastIndex; + var result = r.test(s); + r.lastIndex = lastIndex; + return result; + }; +}; + +export const _match = function (just) { + return function (nothing) { + return function (r) { + return function (s) { + var m = s.match(r); + if (m == null || m.length === 0) { + return nothing; + } else { + for (var i = 0; i < m.length; i++) { + m[i] = m[i] == null ? nothing : just(m[i]); + } + return just(m); + } + }; + }; + }; +}; + +export const replace = function (r) { + return function (s1) { + return function (s2) { + return s2.replace(r, s1); + }; + }; +}; + +export const _replaceBy = function (just) { + return function (nothing) { + return function (r) { + return function (f) { + return function (s) { + return s.replace(r, function (match) { + var groups = []; + var group, i = 1; + while (typeof (group = arguments[i++]) !== "number") { + groups.push(group == null ? nothing : just(group)); + } + return f(match)(groups); + }); + }; + }; + }; + }; +}; + +export const _search = function (just) { + return function (nothing) { + return function (r) { + return function (s) { + var result = s.search(r); + return result === -1 ? nothing : just(result); + }; + }; + }; +}; + +export const split = function (r) { + return function (s) { + return s.split(r); + }; +}; diff --git a/src/Data/String/Regex.purs b/src/Data/String/Regex.purs index 225f1b6..aae56e1 100644 --- a/src/Data/String/Regex.purs +++ b/src/Data/String/Regex.purs @@ -1,150 +1,131 @@ -module Data.String.Regex ( - Regex(..), - RegexFlags(..), - regex, - source, - flags, - renderFlags, - parseFlags, - test, - match, - replace, - replace', - search, - split +-- | Wraps Javascript's `RegExp` object that enables matching strings with +-- | patterns defined by regular expressions. +-- | For details of the underlying implementation, see [RegExp Reference at MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp). +module Data.String.Regex + ( Regex(..) + , regex + , source + , flags + , renderFlags + , parseFlags + , test + , match + , replace + , replace' + , search + , split ) where -import Data.Function -import Data.Maybe -import Data.String (indexOf) +import Prelude -foreign import data Regex :: * +import Data.Array.NonEmpty (NonEmptyArray) +import Data.Either (Either(..)) +import Data.Maybe (Maybe(..)) +import Data.String (contains) +import Data.String.Pattern (Pattern(..)) +import Data.String.Regex.Flags (RegexFlags(..), RegexFlagsRec) -foreign import showRegex' - """ - function showRegex$prime(r) { - return '' + r; - } - """ :: Regex -> String +-- | Wraps Javascript `RegExp` objects. +foreign import data Regex :: Type + +foreign import showRegexImpl :: Regex -> String instance showRegex :: Show Regex where - show = showRegex' - -type RegexFlags = - { global :: Boolean - , ignoreCase :: Boolean - , multiline :: Boolean - , sticky :: Boolean - , unicode :: Boolean - } + show = showRegexImpl -foreign import regex' - """ - function regex$prime(s1) { - return function(s2) { - return new RegExp(s1, s2); - }; - } - """ :: String -> String -> Regex +foreign import regexImpl + :: (String -> Either String Regex) + -> (Regex -> Either String Regex) + -> String + -> String + -> Either String Regex -regex :: String -> RegexFlags -> Regex -regex source flags = regex' source $ renderFlags flags +-- | Constructs a `Regex` from a pattern string and flags. Fails with +-- | `Left error` if the pattern contains a syntax error. +regex :: String -> RegexFlags -> Either String Regex +regex s f = regexImpl Left Right s $ renderFlags f -foreign import source - """ - function source(r) { - return r.source; - } - """ :: Regex -> String - -foreign import flags - """ - function flags(r) { - return { - multiline: r.multiline, - ignoreCase: r.ignoreCase, - global: r.global, - sticky: !!r.sticky, - unicode: !!r.unicode - }; - } - """ :: Regex -> RegexFlags +-- | Returns the pattern string used to construct the given `Regex`. +foreign import source :: Regex -> String -renderFlags :: RegexFlags -> String -renderFlags flags = - (if flags.global then "g" else "") ++ - (if flags.ignoreCase then "i" else "") ++ - (if flags.multiline then "m" else "") ++ - (if flags.sticky then "y" else "") ++ - (if flags.unicode then "u" else "") +-- | Returns the `RegexFlags` used to construct the given `Regex`. +flags :: Regex -> RegexFlags +flags = RegexFlags <<< flagsImpl + +-- | Returns the `RegexFlags` inner record used to construct the given `Regex`. +foreign import flagsImpl :: Regex -> RegexFlagsRec +-- | Returns the string representation of the given `RegexFlags`. +renderFlags :: RegexFlags -> String +renderFlags (RegexFlags f) = + (if f.global then "g" else "") <> + (if f.ignoreCase then "i" else "") <> + (if f.multiline then "m" else "") <> + (if f.dotAll then "s" else "") <> + (if f.sticky then "y" else "") <> + (if f.unicode then "u" else "") + +-- | Parses the string representation of `RegexFlags`. parseFlags :: String -> RegexFlags -parseFlags s = - { global: indexOf "g" s >= 0 - , ignoreCase: indexOf "i" s >= 0 - , multiline: indexOf "m" s >= 0 - , sticky: indexOf "y" s >= 0 - , unicode: indexOf "u" s >= 0 +parseFlags s = RegexFlags + { global: contains (Pattern "g") s + , ignoreCase: contains (Pattern "i") s + , multiline: contains (Pattern "m") s + , dotAll: contains (Pattern "s") s + , sticky: contains (Pattern "y") s + , unicode: contains (Pattern "u") s } -foreign import test - """ - function test(r) { - return function(s) { - return r.test(s); - }; - } - """ :: Regex -> String -> Boolean +-- | Returns `true` if the `Regex` matches the string. In contrast to +-- | `RegExp.prototype.test()` in JavaScript, `test` does not affect +-- | the `lastIndex` property of the Regex. +foreign import test :: Regex -> String -> Boolean foreign import _match - """ - function _match(r, s, Just, Nothing) { - var m = s.match(r); - return m == null ? Nothing : Just(m); - } - """ :: forall r. Fn4 Regex String ([String] -> r) r r - -match :: Regex -> String -> Maybe [String] -match r s = runFn4 _match r s Just Nothing - -foreign import replace - """ - function replace(r) { - return function(s1) { - return function(s2) { - return s2.replace(r, s1); - }; - }; - } - """ :: Regex -> String -> String -> String - -foreign import replace' - """ - function replace$prime(r) { - return function(f) { - return function(s2) { - return s2.replace(r, function(match) { - return f(match)(Array.prototype.splice.call(arguments, 1, arguments.length - 3)); - }); - }; - }; - } - """ :: Regex -> (String -> [String] -> String) -> String -> String - -foreign import search - """ - function search(r) { - return function(s) { - return s.search(r); - }; - } - """ :: Regex -> String -> Number - -foreign import split - """ - function split(r) { - return function(s) { - return s.split(r); - }; - } - """ :: Regex -> String -> [String] + :: (forall r. r -> Maybe r) + -> (forall r. Maybe r) + -> Regex + -> String + -> Maybe (NonEmptyArray (Maybe String)) + +-- | Matches the string against the `Regex` and returns an array of matches +-- | if there were any. Each match has type `Maybe String`, where `Nothing` +-- | represents an unmatched optional capturing group. +-- | See [reference](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match). +match :: Regex -> String -> Maybe (NonEmptyArray (Maybe String)) +match = _match Just Nothing + +-- | Replaces occurrences of the `Regex` with the first string. The replacement +-- | string can include special replacement patterns escaped with `"$"`. +-- | See [reference](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace). +foreign import replace :: Regex -> String -> String -> String + +foreign import _replaceBy + :: (forall r. r -> Maybe r) + -> (forall r. Maybe r) + -> Regex + -> (String -> Array (Maybe String) -> String) + -> String + -> String + +-- | Transforms occurrences of the `Regex` using a function of the matched +-- | substring and a list of captured substrings of type `Maybe String`, +-- | where `Nothing` represents an unmatched optional capturing group. +-- | See the [reference](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_function_as_a_parameter). +replace' :: Regex -> (String -> Array (Maybe String) -> String) -> String -> String +replace' = _replaceBy Just Nothing + +foreign import _search + :: (forall r. r -> Maybe r) + -> (forall r. Maybe r) + -> Regex + -> String + -> Maybe Int + +-- | Returns `Just` the index of the first match of the `Regex` in the string, +-- | or `Nothing` if there is no match. +search :: Regex -> String -> Maybe Int +search = _search Just Nothing + +-- | Split the string into an array of substrings along occurrences of the `Regex`. +foreign import split :: Regex -> String -> Array String diff --git a/src/Data/String/Regex/Flags.purs b/src/Data/String/Regex/Flags.purs new file mode 100644 index 0000000..6d7dd71 --- /dev/null +++ b/src/Data/String/Regex/Flags.purs @@ -0,0 +1,129 @@ +module Data.String.Regex.Flags where + +import Prelude + +import Control.MonadPlus (guard) +import Data.Newtype (class Newtype) +import Data.String (joinWith) + +type RegexFlagsRec = + { global :: Boolean + , ignoreCase :: Boolean + , multiline :: Boolean + , dotAll :: Boolean + , sticky :: Boolean + , unicode :: Boolean + } + +-- | Flags that control matching. +newtype RegexFlags = RegexFlags RegexFlagsRec + +derive instance newtypeRegexFlags :: Newtype RegexFlags _ + +-- | All flags set to false. +noFlags :: RegexFlags +noFlags = RegexFlags + { global: false + , ignoreCase: false + , multiline: false + , dotAll: false + , sticky: false + , unicode: false + } + +-- | Only global flag set to true +global :: RegexFlags +global = RegexFlags + { global: true + , ignoreCase: false + , multiline: false + , dotAll: false + , sticky: false + , unicode: false + } + +-- | Only ignoreCase flag set to true +ignoreCase :: RegexFlags +ignoreCase = RegexFlags + { global: false + , ignoreCase: true + , multiline: false + , dotAll: false + , sticky: false + , unicode: false + } + +-- | Only multiline flag set to true +multiline :: RegexFlags +multiline = RegexFlags + { global: false + , ignoreCase: false + , multiline: true + , dotAll: false + , sticky: false + , unicode: false + } + +-- | Only sticky flag set to true +sticky :: RegexFlags +sticky = RegexFlags + { global: false + , ignoreCase: false + , multiline: false + , dotAll: false + , sticky: true + , unicode: false + } + +-- | Only unicode flag set to true +unicode :: RegexFlags +unicode = RegexFlags + { global: false + , ignoreCase: false + , multiline: false + , dotAll: false + , sticky: false + , unicode: true + } + +-- | Only dotAll flag set to true +dotAll :: RegexFlags +dotAll = RegexFlags + { global: false + , ignoreCase: false + , multiline: false + , dotAll: true + , sticky: false + , unicode: false + } + +instance semigroupRegexFlags :: Semigroup RegexFlags where + append (RegexFlags x) (RegexFlags y) = RegexFlags + { global: x.global || y.global + , ignoreCase: x.ignoreCase || y.ignoreCase + , multiline: x.multiline || y.multiline + , dotAll: x.dotAll || y.dotAll + , sticky: x.sticky || y.sticky + , unicode: x.unicode || y.unicode + } + +instance monoidRegexFlags :: Monoid RegexFlags where + mempty = noFlags + +derive newtype instance eqRegexFlags :: Eq RegexFlags + +instance showRegexFlags :: Show RegexFlags where + show (RegexFlags flags) = + let + usedFlags = + [] + <> (guard flags.global $> "global") + <> (guard flags.ignoreCase $> "ignoreCase") + <> (guard flags.multiline $> "multiline") + <> (guard flags.dotAll $> "dotAll") + <> (guard flags.sticky $> "sticky") + <> (guard flags.unicode $> "unicode") + in + if usedFlags == [] + then "noFlags" + else "(" <> joinWith " <> " usedFlags <> ")" diff --git a/src/Data/String/Regex/Unsafe.purs b/src/Data/String/Regex/Unsafe.purs new file mode 100644 index 0000000..8afd1a2 --- /dev/null +++ b/src/Data/String/Regex/Unsafe.purs @@ -0,0 +1,14 @@ +module Data.String.Regex.Unsafe + ( unsafeRegex + ) where + +import Control.Category (identity) +import Data.Either (either) +import Data.String.Regex (Regex, regex) +import Data.String.Regex.Flags (RegexFlags) +import Partial.Unsafe (unsafeCrashWith) + +-- | Constructs a `Regex` from a pattern string and flags. Fails with +-- | an exception if the pattern contains a syntax error. +unsafeRegex :: String -> RegexFlags -> Regex +unsafeRegex s f = either unsafeCrashWith identity (regex s f) diff --git a/src/Data/String/Unsafe.js b/src/Data/String/Unsafe.js new file mode 100644 index 0000000..75772aa --- /dev/null +++ b/src/Data/String/Unsafe.js @@ -0,0 +1,11 @@ +export const charAt = function (i) { + return function (s) { + if (i >= 0 && i < s.length) return s.charAt(i); + throw new Error("Data.String.Unsafe.charAt: Invalid index."); + }; +}; + +export const char = function (s) { + if (s.length === 1) return s.charAt(0); + throw new Error("Data.String.Unsafe.char: Expected string of length 1."); +}; diff --git a/src/Data/String/Unsafe.purs b/src/Data/String/Unsafe.purs index 20e54dc..75f5037 100644 --- a/src/Data/String/Unsafe.purs +++ b/src/Data/String/Unsafe.purs @@ -1,24 +1,15 @@ +-- | Unsafe string and character functions. module Data.String.Unsafe - ( charAt - , charCodeAt + ( char + , charAt ) where - import Data.Char +-- | Returns the character at the given index. +-- | +-- | **Unsafe:** throws runtime exception if the index is out of bounds. +foreign import charAt :: Int -> String -> Char - foreign import charCodeAt - """ - function charCodeAt(i) { - return function(s) { - return s.charCodeAt(i); - }; - } - """ :: Number -> String -> Number - - foreign import charAt - """ - function charAt(i) { - return function(s) { - return s.charAt(i); - }; - } - """ :: Number -> String -> Char +-- | Converts a string of length `1` to a character. +-- | +-- | **Unsafe:** throws runtime exception if length is not `1`. +foreign import char :: String -> Char diff --git a/test/Test/Data/String.purs b/test/Test/Data/String.purs new file mode 100644 index 0000000..5c73153 --- /dev/null +++ b/test/Test/Data/String.purs @@ -0,0 +1,133 @@ +module Test.Data.String (testString) where + +import Prelude + +import Data.Maybe (Maybe(..)) +import Data.String as S +import Data.String.Pattern (Pattern(..), Replacement(..)) +import Effect (Effect) +import Effect.Console (log) +import Test.Assert (assert, assertEqual) + +testString :: Effect Unit +testString = do + + log "null" + assert $ S.null "" + assert $ not (S.null "a") + + log "stripPrefix" + -- this is a re-export from Data.String.CodeUnits, so the majority of tests are in there + assertEqual + { actual: S.stripPrefix (Pattern "𝕒𝕓𝕔") "𝕒𝕓𝕔𝕕𝕖" + , expected: Just "𝕕𝕖" + } + + log "stripSuffix" + -- this is a re-export from Data.String.CodeUnits, so the majority of tests are in there + assertEqual + { actual: S.stripSuffix (Pattern "𝕔𝕕𝕖") "𝕒𝕓𝕔𝕕𝕖" + , expected: Just "𝕒𝕓" + } + + log "contains" + assert $ S.contains (Pattern "") "" + assert $ S.contains (Pattern "") "abcd" + assert $ S.contains (Pattern "bc") "abcd" + assert $ not S.contains (Pattern "cb") "abcd" + + log "localeCompare" + assertEqual + { actual: S.localeCompare "" "" + , expected: EQ + } + assertEqual + { actual: S.localeCompare "a" "a" + , expected: EQ + } + assertEqual + { actual: S.localeCompare "a" "b" + , expected: LT + } + assertEqual + { actual: S.localeCompare "b" "a" + , expected: GT + } + + log "replace" + assertEqual + { actual: S.replace (Pattern "b") (Replacement "") "abc" + , expected: "ac" + } + assertEqual + { actual: S.replace (Pattern "b") (Replacement "!") "abc" + , expected: "a!c" + } + assertEqual + { actual: S.replace (Pattern "d") (Replacement "!") "abc" + , expected: "abc" + } + + log "replaceAll" + assertEqual + { actual: S.replaceAll (Pattern "b") (Replacement "") "abbbbbc" + , expected: "ac" + } + assertEqual + { actual: S.replaceAll (Pattern "[b]") (Replacement "!") "a[b]c" + , expected: "a!c" + } + + log "split" + assertEqual + { actual: S.split (Pattern "") "" + , expected: [] + } + assertEqual + { actual: S.split (Pattern "") "a" + , expected: ["a"] + } + assertEqual + { actual: S.split (Pattern "") "ab" + , expected: ["a", "b"] + } + assertEqual + { actual: S.split (Pattern "b") "aabcc" + , expected: ["aa", "cc"] + } + assertEqual + { actual: S.split (Pattern "d") "abc" + , expected: ["abc"] + } + + log "toLower" + assertEqual + { actual: S.toLower "bAtMaN" + , expected: "batman" + } + + log "toUpper" + assertEqual + { actual: S.toUpper "bAtMaN" + , expected: "BATMAN" + } + + log "trim" + assertEqual + { actual: S.trim " abc " + , expected: "abc" + } + + log "joinWith" + assertEqual + { actual: S.joinWith "" [] + , expected: "" + } + assertEqual + { actual: S.joinWith "" ["a", "b"] + , expected: "ab" + } + assertEqual + { actual: S.joinWith "--" ["a", "b", "c"] + , expected: "a--b--c" + } diff --git a/test/Test/Data/String/CaseInsensitive.purs b/test/Test/Data/String/CaseInsensitive.purs new file mode 100644 index 0000000..a263732 --- /dev/null +++ b/test/Test/Data/String/CaseInsensitive.purs @@ -0,0 +1,22 @@ +module Test.Data.String.CaseInsensitive (testCaseInsensitiveString) where + +import Prelude + +import Data.String.CaseInsensitive (CaseInsensitiveString(..)) +import Effect (Effect) +import Effect.Console (log) +import Test.Assert (assertEqual) + +testCaseInsensitiveString :: Effect Unit +testCaseInsensitiveString = do + log "equality" + assertEqual + { actual: CaseInsensitiveString "aB" + , expected: CaseInsensitiveString "AB" + } + + log "comparison" + assertEqual + { actual: compare (CaseInsensitiveString "qwerty") (CaseInsensitiveString "QWERTY") + , expected: EQ + } diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs new file mode 100644 index 0000000..587ec89 --- /dev/null +++ b/test/Test/Data/String/CodePoints.purs @@ -0,0 +1,653 @@ +module Test.Data.String.CodePoints (testStringCodePoints) where + +import Prelude + +import Data.Enum (fromEnum, toEnum) +import Data.Maybe (Maybe(..), fromJust) +import Data.String.CodePoints as SCP +import Data.String.Pattern (Pattern(..)) +import Effect (Effect) +import Effect.Console (log) +import Partial.Unsafe (unsafePartial) +import Test.Assert (assertEqual) + +str :: String +str = "a\xDC00\xD800\xD800\x16805\x16A06z" + +testStringCodePoints :: Effect Unit +testStringCodePoints = do + + log "show" + assertEqual + { actual: map show (SCP.codePointAt 0 str) + , expected: Just "(CodePoint 0x61)" + } + assertEqual + { actual: map show (SCP.codePointAt 1 str) + , expected: Just "(CodePoint 0xDC00)" + } + assertEqual + { actual: map show (SCP.codePointAt 2 str) + , expected: Just "(CodePoint 0xD800)" + } + assertEqual + { actual: map show (SCP.codePointAt 3 str) + , expected: Just "(CodePoint 0xD800)" + } + assertEqual + { actual: map show (SCP.codePointAt 4 str) + , expected: Just "(CodePoint 0x16805)" + } + assertEqual + { actual: map show (SCP.codePointAt 5 str) + , expected: Just "(CodePoint 0x16A06)" + } + assertEqual + { actual: map show (SCP.codePointAt 6 str) + , expected: Just "(CodePoint 0x7A)" + } + + log "codePointFromChar" + assertEqual + { actual: Just (SCP.codePointFromChar 'A') + , expected: (toEnum 65) + } + assertEqual + { actual: (SCP.codePointFromChar <$> toEnum 0) + , expected: toEnum 0 + } + assertEqual + { actual: (SCP.codePointFromChar <$> toEnum 0xFFFF) + , expected: toEnum 0xFFFF + } + + log "singleton" + assertEqual + { actual: (SCP.singleton <$> toEnum 0x30) + , expected: Just "0" + } + assertEqual + { actual: (SCP.singleton <$> toEnum 0x16805) + , expected: Just "\x16805" + } + + log "codePointAt" + assertEqual + { actual: SCP.codePointAt (-1) str + , expected: Nothing + } + assertEqual + { actual: SCP.codePointAt 0 str + , expected: (toEnum 0x61) + } + assertEqual + { actual: SCP.codePointAt 1 str + , expected: (toEnum 0xDC00) + } + assertEqual + { actual: SCP.codePointAt 2 str + , expected: (toEnum 0xD800) + } + assertEqual + { actual: SCP.codePointAt 3 str + , expected: (toEnum 0xD800) + } + assertEqual + { actual: SCP.codePointAt 4 str + , expected: (toEnum 0x16805) + } + assertEqual + { actual: SCP.codePointAt 5 str + , expected: (toEnum 0x16A06) + } + assertEqual + { actual: SCP.codePointAt 6 str + , expected: (toEnum 0x7A) + } + assertEqual + { actual: SCP.codePointAt 7 str + , expected: Nothing + } + + log "uncons" + assertEqual + { actual: SCP.uncons str + , expected: Just {head: cp 0x61, tail: "\xDC00\xD800\xD800\x16805\x16A06z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 1 str) + , expected: Just {head: cp 0xDC00, tail: "\xD800\xD800\x16805\x16A06z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 2 str) + , expected: Just {head: cp 0xD800, tail: "\xD800\x16805\x16A06z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 3 str) + , expected: Just {head: cp 0xD800, tail: "\x16805\x16A06z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 4 str) + , expected: Just {head: cp 0x16805, tail: "\x16A06z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 5 str) + , expected: Just {head: cp 0x16A06, tail: "z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 6 str) + , expected: Just {head: cp 0x7A, tail: ""} + } + assertEqual + { actual: SCP.uncons "" + , expected: Nothing + } + + log "length" + assertEqual + { actual: SCP.length "" + , expected: 0 + } + assertEqual + { actual: SCP.length "a" + , expected: 1 + } + assertEqual + { actual: SCP.length "ab" + , expected: 2 + } + assertEqual + { actual: SCP.length str + , expected: 7 + } + + log "countPrefix" + assertEqual + { actual: SCP.countPrefix (\_ -> true) "" + , expected: 0 + } + assertEqual + { actual: SCP.countPrefix (\_ -> false) str + , expected: 0 + } + assertEqual + { actual: SCP.countPrefix (\_ -> true) str + , expected: 7 + } + assertEqual + { actual: SCP.countPrefix (\x -> fromEnum x < 0xFFFF) str + , expected: 4 + } + assertEqual + { actual: SCP.countPrefix (\x -> fromEnum x < 0xDC00) str + , expected: 1 + } + + log "indexOf" + assertEqual + { actual: SCP.indexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern "") str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern str) str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern "a") str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xDC00\xD800\xD800") str + , expected: Just 1 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800") str + , expected: Just 2 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800\xD800") str + , expected: Just 2 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800\xD81A") str + , expected: Just 3 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800\x16805") str + , expected: Just 3 + } + assertEqual + { actual: SCP.indexOf (Pattern "\x16805") str + , expected: Just 4 + } + assertEqual + { actual: SCP.indexOf (Pattern "\x16A06") str + , expected: Just 5 + } + assertEqual + { actual: SCP.indexOf (Pattern "z") str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf (Pattern "\n") str + , expected: Nothing + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD81A") str + , expected: Just 4 + } + + log "indexOf'" + assertEqual + { actual: SCP.indexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf' (Pattern str) 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf' (Pattern str) 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.indexOf' (Pattern "a") 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf' (Pattern "a") 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 0 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 1 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 2 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 3 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 4 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 5 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 6 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 7 str + , expected: Nothing + } + + log "lastIndexOf" + assertEqual + { actual: SCP.lastIndexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "") str + , expected: Just 7 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern str) str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "a") str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xDC00\xD800\xD800") str + , expected: Just 1 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800") str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800\xD800") str + , expected: Just 2 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800\xD81A") str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800\x16805") str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\x16805") str + , expected: Just 4 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\x16A06") str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "z") str + , expected: Just 6 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\n") str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD81A") str + , expected: Just 5 + } + + log "lastIndexOf'" + assertEqual + { actual: SCP.lastIndexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern str) 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern str) 1 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "a") (-1) str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "a") 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "a") 7 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "a") (SCP.length str) str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 0 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 2 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 3 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 4 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 5 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 6 str + , expected: Just 6 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 7 str + , expected: Just 6 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 7 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 6 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 5 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 4 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 3 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 2 str + , expected: Just 2 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 0 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 7 str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 6 str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 5 str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 4 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 3 str + , expected: Nothing + } + + log "take" + assertEqual + { actual: SCP.take (-1) str + , expected: "" + } + assertEqual + { actual: SCP.take 0 str + , expected: "" + } + assertEqual + { actual: SCP.take 1 str + , expected: "a" + } + assertEqual + { actual: SCP.take 2 str + , expected: "a\xDC00" + } + assertEqual + { actual: SCP.take 3 str + , expected: "a\xDC00\xD800" + } + assertEqual + { actual: SCP.take 4 str + , expected: "a\xDC00\xD800\xD800" + } + assertEqual + { actual: SCP.take 5 str + , expected: "a\xDC00\xD800\xD800\x16805" + } + assertEqual + { actual: SCP.take 6 str + , expected: "a\xDC00\xD800\xD800\x16805\x16A06" + } + assertEqual + { actual: SCP.take 7 str + , expected: str + } + assertEqual + { actual: SCP.take 8 str + , expected: str + } + + log "takeWhile" + assertEqual + { actual: SCP.takeWhile (\_ -> true) str + , expected: str + } + assertEqual + { actual: SCP.takeWhile (\_ -> false) str + , expected: "" + } + assertEqual + { actual: SCP.takeWhile (\c -> fromEnum c < 0xFFFF) str + , expected: "a\xDC00\xD800\xD800" + } + assertEqual + { actual: SCP.takeWhile (\c -> fromEnum c < 0xDC00) str + , expected: "a" + } + + log "drop" + assertEqual + { actual: SCP.drop (-1) str + , expected: str + } + assertEqual + { actual: SCP.drop 0 str + , expected: str + } + assertEqual + { actual: SCP.drop 1 str + , expected: "\xDC00\xD800\xD800\x16805\x16A06z" + } + assertEqual + { actual: SCP.drop 2 str + , expected: "\xD800\xD800\x16805\x16A06z" + } + assertEqual + { actual: SCP.drop 3 str + , expected: "\xD800\x16805\x16A06z" + } + assertEqual + { actual: SCP.drop 4 str + , expected: "\x16805\x16A06z" + } + assertEqual + { actual: SCP.drop 5 str + , expected: "\x16A06z" + } + assertEqual + { actual: SCP.drop 6 str + , expected: "z" + } + assertEqual + { actual: SCP.drop 7 str + , expected: "" + } + assertEqual + { actual: SCP.drop 8 str + , expected: "" + } + + log "dropWhile" + assertEqual + { actual: SCP.dropWhile (\_ -> true) str + , expected: "" + } + assertEqual + { actual: SCP.dropWhile (\_ -> false) str + , expected: str + } + assertEqual + { actual: SCP.dropWhile (\c -> fromEnum c < 0xFFFF) str + , expected: "\x16805\x16A06z" + } + assertEqual + { actual: SCP.dropWhile (\c -> fromEnum c < 0xDC00) str + , expected: "\xDC00\xD800\xD800\x16805\x16A06z" + } + + log "splitAt" + assertEqual + { actual: SCP.splitAt 0 "" + , expected: {before: "", after: "" } + } + assertEqual + { actual: SCP.splitAt 1 "" + , expected: {before: "", after: "" } + } + assertEqual + { actual: SCP.splitAt 0 "a" + , expected: {before: "", after: "a"} + } + assertEqual + { actual: SCP.splitAt 1 "ab" + , expected: {before: "a", after: "b"} + } + assertEqual + { actual: SCP.splitAt 3 "aabcc" + , expected: {before: "aab", after: "cc"} + } + assertEqual + { actual: SCP.splitAt (-1) "abc" + , expected: {before: "", after: "abc"} + } + assertEqual + { actual: SCP.splitAt 0 str + , expected: {before: "", after: str} + } + assertEqual + { actual: SCP.splitAt 1 str + , expected: {before: "a", after: "\xDC00\xD800\xD800\x16805\x16A06z"} + } + assertEqual + { actual: SCP.splitAt 2 str + , expected: {before: "a\xDC00", after: "\xD800\xD800\x16805\x16A06z"} + } + assertEqual + { actual: SCP.splitAt 3 str + , expected: {before: "a\xDC00\xD800", after: "\xD800\x16805\x16A06z"} + } + assertEqual + { actual: SCP.splitAt 4 str + , expected: {before: "a\xDC00\xD800\xD800", after: "\x16805\x16A06z"} + } + assertEqual + { actual: SCP.splitAt 5 str + , expected: {before: "a\xDC00\xD800\xD800\x16805", after: "\x16A06z"} + } + assertEqual + { actual: SCP.splitAt 6 str + , expected: {before: "a\xDC00\xD800\xD800\x16805\x16A06", after: "z"} + } + assertEqual + { actual: SCP.splitAt 7 str + , expected: {before: str, after: ""} + } + assertEqual + { actual: SCP.splitAt 8 str + , expected: {before: str, after: ""} + } + +cp :: Int -> SCP.CodePoint +cp = unsafePartial fromJust <<< toEnum diff --git a/test/Test/Data/String/CodeUnits.purs b/test/Test/Data/String/CodeUnits.purs new file mode 100644 index 0000000..30bf100 --- /dev/null +++ b/test/Test/Data/String/CodeUnits.purs @@ -0,0 +1,530 @@ +module Test.Data.String.CodeUnits (testStringCodeUnits) where + +import Prelude + +import Data.Enum (fromEnum) +import Data.Maybe (Maybe(..), isNothing) +import Data.String.CodeUnits as SCU +import Data.String.Pattern (Pattern(..)) +import Effect (Effect) +import Effect.Console (log) +import Test.Assert (assert, assertEqual) + +testStringCodeUnits :: Effect Unit +testStringCodeUnits = do + log "stripPrefix" + assertEqual + { actual: SCU.stripPrefix (Pattern "abc") "abcde" + , expected: Just "de" + } + assertEqual + { actual: SCU.stripPrefix (Pattern "xyz") "abcde" + , expected: Nothing + } + assertEqual + { actual: SCU.stripPrefix (Pattern "abcd") "ab" + , expected: Nothing + } + assertEqual + { actual: SCU.stripPrefix (Pattern "abc") "abc" + , expected: Just "" + } + assertEqual + { actual: SCU.stripPrefix (Pattern "") "abc" + , expected: Just "abc" + } + assertEqual + { actual: SCU.stripPrefix (Pattern "") "" + , expected: Just "" + } + + log "stripSuffix" + assertEqual + { actual: SCU.stripSuffix (Pattern "cde") "abcde" + , expected: Just "ab" + } + assertEqual + { actual: SCU.stripSuffix (Pattern "xyz") "abcde" + , expected: Nothing + } + assertEqual + { actual: SCU.stripSuffix (Pattern "abcd") "cd" + , expected: Nothing + } + assertEqual + { actual: SCU.stripSuffix (Pattern "abc") "abc" + , expected: Just "" + } + assertEqual + { actual: SCU.stripSuffix (Pattern "") "abc" + , expected: Just "abc" + } + assertEqual + { actual: SCU.stripSuffix (Pattern "") "" + , expected: Just "" + } + + log "startsWith" + assert $ SCU.startsWith (Pattern "foo") "foobar" + assert $ SCU.startsWith (Pattern "foo") "foo" + assert $ SCU.startsWith (Pattern "") "" + assert $ SCU.startsWith (Pattern "") "foo" + assert $ not $ SCU.startsWith (Pattern "foo") "" + + log "endsWith" + assert $ SCU.endsWith (Pattern "bar") "foobar" + assert $ SCU.endsWith (Pattern "bar") "bar" + assert $ SCU.endsWith (Pattern "") "" + assert $ SCU.endsWith (Pattern "") "bar" + assert $ not $ SCU.endsWith (Pattern "bar") "" + + log "charAt" + assertEqual + { actual: SCU.charAt 0 "" + , expected: Nothing + } + assertEqual + { actual: SCU.charAt 0 "a" + , expected: Just 'a' + } + assertEqual + { actual: SCU.charAt 1 "a" + , expected: Nothing + } + assertEqual + { actual: SCU.charAt 0 "ab" + , expected: Just 'a' + } + assertEqual + { actual: SCU.charAt 1 "ab" + , expected: Just 'b' + } + assertEqual + { actual: SCU.charAt 2 "ab" + , expected: Nothing + } + + log "singleton" + assertEqual + { actual: SCU.singleton 'a' + , expected: "a" + } + + log "charCodeAt" + assertEqual + { actual: (fromEnum <$> SCU.charAt 0 "") + , expected: Nothing + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 0 "a") + , expected: Just 97 + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 1 "a") + , expected: Nothing + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 0 "ab") + , expected: Just 97 + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 1 "ab") + , expected: Just 98 + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 2 "ab") + , expected: Nothing + } + + log "toChar" + assertEqual + { actual: SCU.toChar "" + , expected: Nothing + } + assertEqual + { actual: SCU.toChar "a" + , expected: Just 'a' + } + assertEqual + { actual: SCU.toChar "ab" + , expected: Nothing + } + + log "uncons" + assert $ isNothing (SCU.uncons "") + assertEqual + { actual: SCU.uncons "a" + , expected: Just { head: 'a', tail: "" } + } + assertEqual + { actual: SCU.uncons "ab" + , expected: Just { head: 'a', tail: "b" } + } + + log "takeWhile" + assertEqual + { actual: SCU.takeWhile (\c -> true) "abc" + , expected: "abc" + } + assertEqual + { actual: SCU.takeWhile (\c -> false) "abc" + , expected: "" + } + assertEqual + { actual: SCU.takeWhile (\c -> c /= 'b') "aabbcc" + , expected: "aa" + } + + log "dropWhile" + assertEqual + { actual: SCU.dropWhile (\c -> true) "abc" + , expected: "" + } + assertEqual + { actual: SCU.dropWhile (\c -> false) "abc" + , expected: "abc" + } + assertEqual + { actual: SCU.dropWhile (\c -> c /= 'b') "aabbcc" + , expected: "bbcc" + } + + log "fromCharArray" + assertEqual + { actual: SCU.fromCharArray [] + , expected: "" + } + assertEqual + { actual: SCU.fromCharArray ['a', 'b'] + , expected: "ab" + } + + log "indexOf" + assertEqual + { actual: SCU.indexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf (Pattern "") "abcd" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf (Pattern "bc") "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf (Pattern "cb") "abcd" + , expected: Nothing + } + + log "indexOf'" + assertEqual + { actual: SCU.indexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") (-1) "ab" + , expected: Nothing + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 0 "ab" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 1 "ab" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 2 "ab" + , expected: Just 2 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 3 "ab" + , expected: Nothing + } + assertEqual + { actual: SCU.indexOf' (Pattern "bc") 0 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf' (Pattern "bc") 1 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf' (Pattern "bc") 2 "abcd" + , expected: Nothing + } + assertEqual + { actual: SCU.indexOf' (Pattern "cb") 0 "abcd" + , expected: Nothing + } + + log "lastIndexOf" + assertEqual + { actual: SCU.lastIndexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCU.lastIndexOf (Pattern "") "abcd" + , expected: Just 4 + } + assertEqual + { actual: SCU.lastIndexOf (Pattern "bc") "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf (Pattern "cb") "abcd" + , expected: Nothing + } + + log "lastIndexOf'" + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") (-1) "ab" + , expected: Just 0 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 0 "ab" + , expected: Just 0 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 1 "ab" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 2 "ab" + , expected: Just 2 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 3 "ab" + , expected: Just 2 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "bc") 0 "abcd" + , expected: Nothing + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "bc") 1 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "bc") 2 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "cb") 0 "abcd" + , expected: Nothing + } + + log "length" + assertEqual + { actual: SCU.length "" + , expected: 0 + } + assertEqual + { actual: SCU.length "a" + , expected: 1 + } + assertEqual + { actual: SCU.length "ab" + , expected: 2 + } + + log "take" + assertEqual + { actual: SCU.take 0 "ab" + , expected: "" + } + assertEqual + { actual: SCU.take 1 "ab" + , expected: "a" + } + assertEqual + { actual: SCU.take 2 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.take 3 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.take (-1) "ab" + , expected: "" + } + + log "takeRight" + assertEqual + { actual: SCU.takeRight 0 "ab" + , expected: "" + } + assertEqual + { actual: SCU.takeRight 1 "ab" + , expected: "b" + } + assertEqual + { actual: SCU.takeRight 2 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.takeRight 3 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.takeRight (-1) "ab" + , expected: "" + } + + log "drop" + assertEqual + { actual: SCU.drop 0 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.drop 1 "ab" + , expected: "b" + } + assertEqual + { actual: SCU.drop 2 "ab" + , expected: "" + } + assertEqual + { actual: SCU.drop 3 "ab" + , expected: "" + } + assertEqual + { actual: SCU.drop (-1) "ab" + , expected: "ab" + } + + log "dropRight" + assertEqual + { actual: SCU.dropRight 0 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.dropRight 1 "ab" + , expected: "a" + } + assertEqual + { actual: SCU.dropRight 2 "ab" + , expected: "" + } + assertEqual + { actual: SCU.dropRight 3 "ab" + , expected: "" + } + assertEqual + { actual: SCU.dropRight (-1) "ab" + , expected: "ab" + } + + log "countPrefix" + assertEqual + { actual: SCU.countPrefix (_ == 'a') "" + , expected: 0 + } + assertEqual + { actual: SCU.countPrefix (_ == 'a') "ab" + , expected: 1 + } + assertEqual + { actual: SCU.countPrefix (_ == 'a') "aaab" + , expected: 3 + } + assertEqual + { actual: SCU.countPrefix (_ == 'a') "abaa" + , expected: 1 + } + + log "splitAt" + assertEqual + { actual: SCU.splitAt 1 "" + , expected: {before: "", after: ""} + } + assertEqual + { actual: SCU.splitAt 0 "a" + , expected: {before: "", after: "a"} + } + assertEqual + { actual: SCU.splitAt 1 "a" + , expected: {before: "a", after: ""} + } + assertEqual + { actual: SCU.splitAt 1 "ab" + , expected: {before: "a", after: "b"} + } + assertEqual + { actual: SCU.splitAt 3 "aabcc" + , expected: {before: "aab", after: "cc"} + } + assertEqual + { actual: SCU.splitAt (-1) "abc" + , expected: {before: "", after: "abc"} + } + assertEqual + { actual: SCU.splitAt 10 "Hi" + , expected: {before: "Hi", after: ""} + } + + log "toCharArray" + assertEqual + { actual: SCU.toCharArray "" + , expected: [] + } + assertEqual + { actual: SCU.toCharArray "a" + , expected: ['a'] + } + assertEqual + { actual: SCU.toCharArray "ab" + , expected: ['a', 'b'] + } + + log "slice" + assertEqual + { actual: SCU.slice 0 0 "purescript" + , expected: "" + } + assertEqual + { actual: SCU.slice 0 1 "purescript" + , expected: "p" + } + assertEqual + { actual: SCU.slice 3 6 "purescript" + , expected: "esc" + } + assertEqual + { actual: SCU.slice 3 10 "purescript" + , expected: "escript" + } + assertEqual + { actual: SCU.slice 10 10 "purescript" + , expected: "" + } + assertEqual + { actual: SCU.slice (-4) (-1) "purescript" + , expected: "rip" + } + assertEqual + { actual: SCU.slice (-4) 3 "purescript" + , expected: "" + } + assertEqual + { actual: SCU.slice 1000 3 "purescript" + , expected: "" + } + assertEqual + { actual: SCU.slice 2 (-15) "purescript" + , expected: "" + } + assertEqual + { actual: SCU.slice (-15) 9 "purescript" + , expected: "purescrip" + } + assertEqual + { actual: SCU.slice 3 1000 "purescript" + , expected: "escript" + } diff --git a/test/Test/Data/String/NonEmpty.purs b/test/Test/Data/String/NonEmpty.purs new file mode 100644 index 0000000..46f2cd0 --- /dev/null +++ b/test/Test/Data/String/NonEmpty.purs @@ -0,0 +1,233 @@ +module Test.Data.String.NonEmpty (testNonEmptyString) where + +import Prelude + +import Data.Array.NonEmpty as NEA +import Data.Maybe (Maybe(..), fromJust) +import Data.String.NonEmpty (Pattern(..), nes) +import Data.String.NonEmpty as NES +import Effect (Effect) +import Effect.Console (log) +import Partial.Unsafe (unsafePartial) +import Test.Assert (assert, assertEqual) +import Type.Proxy (Proxy(..)) + +testNonEmptyString :: Effect Unit +testNonEmptyString = do + + log "fromString" + assertEqual + { actual: NES.fromString "" + , expected: Nothing + } + assertEqual + { actual: NES.fromString "hello" + , expected: Just (nes (Proxy :: Proxy "hello")) + } + + log "toString" + assertEqual + { actual: (NES.toString <$> NES.fromString "hello") + , expected: Just "hello" + } + + log "appendString" + assertEqual + { actual: NES.appendString (nes (Proxy :: Proxy "Hello")) " world" + , expected: nes (Proxy :: Proxy "Hello world") + } + assertEqual + { actual: NES.appendString (nes (Proxy :: Proxy "Hello")) "" + , expected: nes (Proxy :: Proxy "Hello") + } + + log "prependString" + assertEqual + { actual: NES.prependString "be" (nes (Proxy :: Proxy "fore")) + , expected: nes (Proxy :: Proxy "before") + } + assertEqual + { actual: NES.prependString "" (nes (Proxy :: Proxy "fore")) + , expected: nes (Proxy :: Proxy "fore") + } + + log "contains" + assert $ NES.contains (Pattern "") (nes (Proxy :: Proxy "abcd")) + assert $ NES.contains (Pattern "bc") (nes (Proxy :: Proxy "abcd")) + assert $ not NES.contains (Pattern "cb") (nes (Proxy :: Proxy "abcd")) + assert $ NES.contains (Pattern "needle") (nes (Proxy :: Proxy "haystack with needle")) + assert $ not NES.contains (Pattern "needle") (nes (Proxy :: Proxy "haystack")) + + log "localeCompare" + assertEqual + { actual: NES.localeCompare (nes (Proxy :: Proxy "a")) (nes (Proxy :: Proxy "a")) + , expected: EQ + } + assertEqual + { actual: NES.localeCompare (nes (Proxy :: Proxy "a")) (nes (Proxy :: Proxy "b")) + , expected: LT + } + assertEqual + { actual: NES.localeCompare (nes (Proxy :: Proxy "b")) (nes (Proxy :: Proxy "a")) + , expected: GT + } + + log "replace" + assertEqual + { actual: NES.replace (Pattern "b") (NES.NonEmptyReplacement (nes (Proxy :: Proxy "!"))) (nes (Proxy :: Proxy "abc")) + , expected: nes (Proxy :: Proxy "a!c") + } + assertEqual + { actual: NES.replace (Pattern "b") (NES.NonEmptyReplacement (nes (Proxy :: Proxy "!"))) (nes (Proxy :: Proxy "abbc")) + , expected: nes (Proxy :: Proxy "a!bc") + } + assertEqual + { actual: NES.replace (Pattern "d") (NES.NonEmptyReplacement (nes (Proxy :: Proxy "!"))) (nes (Proxy :: Proxy "abc")) + , expected: nes (Proxy :: Proxy "abc") + } + + log "replaceAll" + assertEqual + { actual: NES.replaceAll (Pattern "[b]") (NES.NonEmptyReplacement (nes (Proxy :: Proxy "!"))) (nes (Proxy :: Proxy "a[b]c")) + , expected: nes (Proxy :: Proxy "a!c") + } + assertEqual + { actual: NES.replaceAll (Pattern "[b]") (NES.NonEmptyReplacement (nes (Proxy :: Proxy "!"))) (nes (Proxy :: Proxy "a[b]c[b]")) + , expected: nes (Proxy :: Proxy "a!c!") + } + assertEqual + { actual: NES.replaceAll (Pattern "x") (NES.NonEmptyReplacement (nes (Proxy :: Proxy "!"))) (nes (Proxy :: Proxy "abc")) + , expected: nes (Proxy :: Proxy "abc") + } + + log "stripPrefix" + assertEqual + { actual: NES.stripPrefix (Pattern "") (nes (Proxy :: Proxy "abc")) + , expected: Just (nes (Proxy :: Proxy "abc")) + } + assertEqual + { actual: NES.stripPrefix (Pattern "a") (nes (Proxy :: Proxy "abc")) + , expected: Just (nes (Proxy :: Proxy "bc")) + } + assertEqual + { actual: NES.stripPrefix (Pattern "abc") (nes (Proxy :: Proxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NES.stripPrefix (Pattern "!") (nes (Proxy :: Proxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NES.stripPrefix (Pattern "http:") (nes (Proxy :: Proxy "http://purescript.org")) + , expected: Just (nes (Proxy :: Proxy "//purescript.org")) + } + assertEqual + { actual: NES.stripPrefix (Pattern "http:") (nes (Proxy :: Proxy "https://purescript.org")) + , expected: Nothing + } + assertEqual + { actual: NES.stripPrefix (Pattern "Hello!") (nes (Proxy :: Proxy "Hello!")) + , expected: Nothing + } + + log "stripSuffix" + assertEqual + { actual: NES.stripSuffix (Pattern ".exe") (nes (Proxy :: Proxy "purs.exe")) + , expected: Just (nes (Proxy :: Proxy "purs")) + } + assertEqual + { actual: NES.stripSuffix (Pattern ".exe") (nes (Proxy :: Proxy "purs")) + , expected: Nothing + } + assertEqual + { actual: NES.stripSuffix (Pattern "Hello!") (nes (Proxy :: Proxy "Hello!")) + , expected: Nothing + } + + log "startsWith" + assert $ NES.startsWith (Pattern "foo") (nes (Proxy :: Proxy "foobar")) + assert $ NES.startsWith (Pattern "foo") (nes (Proxy :: Proxy "foo")) + assert $ NES.startsWith (Pattern "") (nes (Proxy :: Proxy "foo")) + assert $ not $ NES.startsWith (Pattern "foo") (nes (Proxy :: Proxy "f")) + + log "endsWith" + assert $ NES.endsWith (Pattern "bar") (nes (Proxy :: Proxy "foobar")) + assert $ NES.endsWith (Pattern "bar") (nes (Proxy :: Proxy "bar")) + assert $ NES.endsWith (Pattern "") (nes (Proxy :: Proxy "f")) + assert $ NES.endsWith (Pattern "") (nes (Proxy :: Proxy "bar")) + assert $ not $ NES.endsWith (Pattern "bar") (nes (Proxy :: Proxy "b")) + + log "toLower" + assertEqual + { actual: NES.toLower (nes (Proxy :: Proxy "bAtMaN")) + , expected: nes (Proxy :: Proxy "batman") + } + + log "toUpper" + assertEqual + { actual: NES.toUpper (nes (Proxy :: Proxy "bAtMaN")) + , expected: nes (Proxy :: Proxy "BATMAN") + } + + log "trim" + assertEqual + { actual: NES.trim (nes (Proxy :: Proxy " abc ")) + , expected: Just (nes (Proxy :: Proxy "abc")) + } + assertEqual + { actual: NES.trim (nes (Proxy :: Proxy " \n")) + , expected: Nothing + } + + log "joinWith" + assertEqual + { actual: NES.joinWith "" [] + , expected: "" + } + assertEqual + { actual: NES.joinWith "" [nes (Proxy :: Proxy "a"), nes (Proxy :: Proxy "b")] + , expected: "ab" + } + assertEqual + { actual: NES.joinWith "--" [nes (Proxy :: Proxy "a"), nes (Proxy :: Proxy "b"), nes (Proxy :: Proxy "c")] + , expected: "a--b--c" + } + + log "join1With" + assertEqual + { actual: NES.join1With "" (nea [nes (Proxy :: Proxy "a"), nes (Proxy :: Proxy "b")]) + , expected: nes (Proxy :: Proxy "ab") + } + assertEqual + { actual: NES.join1With "--" (nea [nes (Proxy :: Proxy "a"), nes (Proxy :: Proxy "b"), nes (Proxy :: Proxy "c")]) + , expected: nes (Proxy :: Proxy "a--b--c") + } + assertEqual + { actual: NES.join1With ", " (nea [nes (Proxy :: Proxy "apple"), nes (Proxy :: Proxy "banana")]) + , expected: nes (Proxy :: Proxy "apple, banana") + } + assertEqual + { actual: NES.join1With "" (nea [nes (Proxy :: Proxy "apple"), nes (Proxy :: Proxy "banana")]) + , expected: nes (Proxy :: Proxy "applebanana") + } + + log "joinWith1" + assertEqual + { actual: NES.joinWith1 (nes (Proxy :: Proxy " ")) (nea ["a", "b"]) + , expected: nes (Proxy :: Proxy "a b") + } + assertEqual + { actual: NES.joinWith1 (nes (Proxy :: Proxy "--")) (nea ["a", "b", "c"]) + , expected: nes (Proxy :: Proxy "a--b--c") + } + assertEqual + { actual: NES.joinWith1 (nes (Proxy :: Proxy ", ")) (nea ["apple", "banana"]) + , expected: nes (Proxy :: Proxy "apple, banana") + } + assertEqual + { actual: NES.joinWith1 (nes (Proxy :: Proxy "/")) (nea ["a", "b", "", "c", ""]) + , expected: nes (Proxy :: Proxy "a/b//c/") + } + +nea :: Array ~> NEA.NonEmptyArray +nea = unsafePartial fromJust <<< NEA.fromArray diff --git a/test/Test/Data/String/NonEmpty/CodeUnits.purs b/test/Test/Data/String/NonEmpty/CodeUnits.purs new file mode 100644 index 0000000..e810dd9 --- /dev/null +++ b/test/Test/Data/String/NonEmpty/CodeUnits.purs @@ -0,0 +1,450 @@ +module Test.Data.String.NonEmpty.CodeUnits (testNonEmptyStringCodeUnits) where + +import Prelude + +import Data.Array.NonEmpty as NEA +import Data.Enum (fromEnum) +import Data.Maybe (Maybe(..), fromJust) +import Data.String.NonEmpty (Pattern(..), nes) +import Data.String.NonEmpty.CodeUnits as NESCU +import Effect (Effect) +import Effect.Console (log) +import Partial.Unsafe (unsafePartial) +import Test.Assert (assertEqual) +import Type.Proxy (Proxy(..)) + +testNonEmptyStringCodeUnits :: Effect Unit +testNonEmptyStringCodeUnits = do + + log "fromCharArray" + assertEqual + { actual: NESCU.fromCharArray [] + , expected: Nothing + } + assertEqual + { actual: NESCU.fromCharArray ['a', 'b'] + , expected: Just (nes (Proxy :: Proxy "ab")) + } + + log "fromNonEmptyCharArray" + assertEqual + { actual: NESCU.fromNonEmptyCharArray (NEA.singleton 'b') + , expected: NESCU.singleton 'b' + } + + log "singleton" + assertEqual + { actual: NESCU.singleton 'a' + , expected: nes (Proxy :: Proxy "a") + } + + log "cons" + assertEqual + { actual: NESCU.cons 'a' "bc" + , expected: nes (Proxy :: Proxy "abc") + } + assertEqual + { actual: NESCU.cons 'a' "" + , expected: nes (Proxy :: Proxy "a") + } + + log "snoc" + assertEqual + { actual: NESCU.snoc 'c' "ab" + , expected: nes (Proxy :: Proxy "abc") + } + assertEqual + { actual: NESCU.snoc 'a' "" + , expected: nes (Proxy :: Proxy "a") + } + + log "fromFoldable1" + assertEqual + { actual: NESCU.fromFoldable1 (nea ['a']) + , expected: nes (Proxy :: Proxy "a") + } + assertEqual + { actual: NESCU.fromFoldable1 (nea ['a', 'b', 'c']) + , expected: nes (Proxy :: Proxy "abc") + } + + log "charAt" + assertEqual + { actual: NESCU.charAt 0 (nes (Proxy :: Proxy "a")) + , expected: Just 'a' + } + assertEqual + { actual: NESCU.charAt 1 (nes (Proxy :: Proxy "a")) + , expected: Nothing + } + assertEqual + { actual: NESCU.charAt 0 (nes (Proxy :: Proxy "ab")) + , expected: Just 'a' + } + assertEqual + { actual: NESCU.charAt 1 (nes (Proxy :: Proxy "ab")) + , expected: Just 'b' + } + assertEqual + { actual: NESCU.charAt 2 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.charAt 2 (nes (Proxy :: Proxy "Hello")) + , expected: Just 'l' + } + assertEqual + { actual: NESCU.charAt 10 (nes (Proxy :: Proxy "Hello")) + , expected: Nothing + } + + log "charCodeAt" + assertEqual + { actual: fromEnum <$> NESCU.charAt 0 (nes (Proxy :: Proxy "a")) + , expected: Just 97 + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 1 (nes (Proxy :: Proxy "a")) + , expected: Nothing + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 0 (nes (Proxy :: Proxy "ab")) + , expected: Just 97 + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 1 (nes (Proxy :: Proxy "ab")) + , expected: Just 98 + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 2 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 2 (nes (Proxy :: Proxy "5 €")) + , expected: Just 0x20AC + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 10 (nes (Proxy :: Proxy "5 €")) + , expected: Nothing + } + + log "toChar" + assertEqual + { actual: NESCU.toChar (nes (Proxy :: Proxy "a")) + , expected: Just 'a' + } + assertEqual + { actual: NESCU.toChar (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + + log "toCharArray" + assertEqual + { actual: NESCU.toCharArray (nes (Proxy :: Proxy "a")) + , expected: ['a'] + } + assertEqual + { actual: NESCU.toCharArray (nes (Proxy :: Proxy "ab")) + , expected: ['a', 'b'] + } + assertEqual + { actual: NESCU.toCharArray (nes (Proxy :: Proxy "Hello☺\n")) + , expected: ['H','e','l','l','o','☺','\n'] + } + + log "toNonEmptyCharArray" + assertEqual + { actual: NESCU.toNonEmptyCharArray (nes (Proxy :: Proxy "ab")) + , expected: nea ['a', 'b'] + } + + log "uncons" + assertEqual + { actual: NESCU.uncons (nes (Proxy :: Proxy "a")) + , expected: { head: 'a', tail: Nothing } + } + assertEqual + { actual: NESCU.uncons (nes (Proxy :: Proxy "Hello World")) + , expected: { head: 'H', tail: Just (nes (Proxy :: Proxy "ello World")) } + } + + log "takeWhile" + assertEqual + { actual: NESCU.takeWhile (\_ -> true) (nes (Proxy :: Proxy "abc")) + , expected: Just (nes (Proxy :: Proxy "abc")) + } + assertEqual + { actual: NESCU.takeWhile (\_ -> false) (nes (Proxy :: Proxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NESCU.takeWhile (\c -> c /= 'b') (nes (Proxy :: Proxy "aabbcc")) + , expected: Just (nes (Proxy :: Proxy "aa")) + } + assertEqual + { actual: NESCU.takeWhile (_ /= ':') (nes (Proxy :: Proxy "http://purescript.org")) + , expected: Just (nes (Proxy :: Proxy "http")) + } + assertEqual + { actual: NESCU.takeWhile (_ == 'a') (nes (Proxy :: Proxy "xyz")) + , expected: Nothing + } + + log "dropWhile" + assertEqual + { actual: NESCU.dropWhile (\_ -> true) (nes (Proxy :: Proxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NESCU.dropWhile (\_ -> false) (nes (Proxy :: Proxy "abc")) + , expected: Just (nes (Proxy :: Proxy "abc")) + } + assertEqual + { actual: NESCU.dropWhile (\c -> c /= 'b') (nes (Proxy :: Proxy "aabbcc")) + , expected: Just (nes (Proxy :: Proxy "bbcc")) + } + assertEqual + { actual: NESCU.dropWhile (_ /= '.') (nes (Proxy :: Proxy "Test.purs")) + , expected: Just (nes (Proxy :: Proxy ".purs")) + } + + log "indexOf" + assertEqual + { actual: NESCU.indexOf (Pattern "") (nes (Proxy :: Proxy "abcd")) + , expected: Just 0 + } + assertEqual + { actual: NESCU.indexOf (Pattern "bc") (nes (Proxy :: Proxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf (Pattern "cb") (nes (Proxy :: Proxy "abcd")) + , expected: Nothing + } + + log "indexOf'" + assertEqual + { actual: NESCU.indexOf' (Pattern "") (-1) (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 0 (nes (Proxy :: Proxy "ab")) + , expected: Just 0 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 1 (nes (Proxy :: Proxy "ab")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 2 (nes (Proxy :: Proxy "ab")) + , expected: Just 2 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 3 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.indexOf' (Pattern "bc") 0 (nes (Proxy :: Proxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "bc") 1 (nes (Proxy :: Proxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "bc") 2 (nes (Proxy :: Proxy "abcd")) + , expected: Nothing + } + assertEqual + { actual: NESCU.indexOf' (Pattern "cb") 0 (nes (Proxy :: Proxy "abcd")) + , expected: Nothing + } + + log "lastIndexOf" + assertEqual + { actual: NESCU.lastIndexOf (Pattern "") (nes (Proxy :: Proxy "abcd")) + , expected: Just 4 + } + assertEqual + { actual: NESCU.lastIndexOf (Pattern "bc") (nes (Proxy :: Proxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf (Pattern "cb") (nes (Proxy :: Proxy "abcd")) + , expected: Nothing + } + + log "lastIndexOf'" + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") (-1) (nes (Proxy :: Proxy "ab")) + , expected: Just 0 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 0 (nes (Proxy :: Proxy "ab")) + , expected: Just 0 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 1 (nes (Proxy :: Proxy "ab")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 2 (nes (Proxy :: Proxy "ab")) + , expected: Just 2 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 3 (nes (Proxy :: Proxy "ab")) + , expected: Just 2 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "bc") 0 (nes (Proxy :: Proxy "abcd")) + , expected: Nothing + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "bc") 1 (nes (Proxy :: Proxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "bc") 2 (nes (Proxy :: Proxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "cb") 0 (nes (Proxy :: Proxy "abcd")) + , expected: Nothing + } + + log "length" + assertEqual + { actual: NESCU.length (nes (Proxy :: Proxy "a")) + , expected: 1 + } + assertEqual + { actual: NESCU.length (nes (Proxy :: Proxy "ab")) + , expected: 2 + } + + log "take" + assertEqual + { actual: NESCU.take 0 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.take 1 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "a")) + } + assertEqual + { actual: NESCU.take 2 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + assertEqual + { actual: NESCU.take 3 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + assertEqual + { actual: NESCU.take (-1) (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + + log "takeRight" + assertEqual + { actual: NESCU.takeRight 0 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.takeRight 1 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "b")) + } + assertEqual + { actual: NESCU.takeRight 2 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + assertEqual + { actual: NESCU.takeRight 3 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + assertEqual + { actual: NESCU.takeRight (-1) (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + + log "drop" + assertEqual + { actual: NESCU.drop 0 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + assertEqual + { actual: NESCU.drop 1 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "b")) + } + assertEqual + { actual: NESCU.drop 2 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.drop 3 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.drop (-1) (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + + log "dropRight" + assertEqual + { actual: NESCU.dropRight 0 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + assertEqual + { actual: NESCU.dropRight 1 (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "a")) + } + assertEqual + { actual: NESCU.dropRight 2 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.dropRight 3 (nes (Proxy :: Proxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.dropRight (-1) (nes (Proxy :: Proxy "ab")) + , expected: Just (nes (Proxy :: Proxy "ab")) + } + + log "countPrefix" + assertEqual + { actual: NESCU.countPrefix (_ == 'a') (nes (Proxy :: Proxy "ab")) + , expected: 1 + } + assertEqual + { actual: NESCU.countPrefix (_ == 'a') (nes (Proxy :: Proxy "aaab")) + , expected: 3 + } + assertEqual + { actual: NESCU.countPrefix (_ == 'a') (nes (Proxy :: Proxy "abaa")) + , expected: 1 + } + assertEqual + { actual: NESCU.countPrefix (_ == 'c') (nes (Proxy :: Proxy "abaa")) + , expected: 0 + } + + log "splitAt" + assertEqual + { actual: NESCU.splitAt 0 (nes (Proxy :: Proxy "a")) + , expected: { before: Nothing, after: Just (nes (Proxy :: Proxy "a")) } + } + assertEqual + { actual: NESCU.splitAt 1 (nes (Proxy :: Proxy "ab")) + , expected: { before: Just (nes (Proxy :: Proxy "a")), after: Just (nes (Proxy :: Proxy "b")) } + } + assertEqual + { actual: NESCU.splitAt 3 (nes (Proxy :: Proxy "aabcc")) + , expected: { before: Just (nes (Proxy :: Proxy "aab")), after: Just (nes (Proxy :: Proxy "cc")) } + } + assertEqual + { actual: NESCU.splitAt (-1) (nes (Proxy :: Proxy "abc")) + , expected: { before: Nothing, after: Just (nes (Proxy :: Proxy "abc")) } + } + +nea :: Array ~> NEA.NonEmptyArray +nea = unsafePartial fromJust <<< NEA.fromArray diff --git a/test/Test/Data/String/Regex.purs b/test/Test/Data/String/Regex.purs new file mode 100644 index 0000000..01d583b --- /dev/null +++ b/test/Test/Data/String/Regex.purs @@ -0,0 +1,61 @@ +module Test.Data.String.Regex (testStringRegex) where + +import Data.String.Regex + +import Data.Array.NonEmpty (NonEmptyArray, fromArray) +import Data.Either (isLeft) +import Data.Maybe (Maybe(..), fromJust) +import Data.String.Regex.Flags (dotAll, global, ignoreCase, noFlags) +import Data.String.Regex.Unsafe (unsafeRegex) +import Effect (Effect) +import Effect.Console (log) +import Partial.Unsafe (unsafePartial) +import Prelude (type (~>), Unit, discard, not, show, ($), (<<<), (<>), (==)) +import Test.Assert (assert) + +testStringRegex :: Effect Unit +testStringRegex = do + log "regex" + assert $ test (unsafeRegex "^a" noFlags) "abc" + assert $ not (test (unsafeRegex "^b" noFlags) "abc") + assert $ isLeft (regex "+" noFlags) + + log "flags" + assert $ "quxbarfoobaz" == replace (unsafeRegex "foo" noFlags) "qux" "foobarfoobaz" + assert $ "quxbarquxbaz" == replace (unsafeRegex "foo" global) "qux" "foobarfoobaz" + assert $ "quxbarquxbaz" == replace (unsafeRegex "foo" (global <> ignoreCase)) "qux" "foobarFOObaz" + assert $ "quxbarfoobaz" == replace (unsafeRegex ".foo" dotAll) "qux" "\nfoobarfoobaz" + + log "match" + assert $ match (unsafeRegex "^abc$" noFlags) "abc" == Just (nea [Just "abc"]) + assert $ match (unsafeRegex "^abc$" noFlags) "xyz" == Nothing + + log "replace" + assert $ replace (unsafeRegex "-" noFlags) "!" "a-b-c" == "a!b-c" + + log "replace'" + assert $ replace' (unsafeRegex "-" noFlags) (\s xs -> "!") "a-b-c" == "a!b-c" + assert $ replace' (unsafeRegex "(foo)(bar)?" noFlags) (\s xs -> show xs) "<>" == "<>" + assert $ replace' (unsafeRegex "(foo)(bar)?" noFlags) (\s xs -> show xs) "" == "<[(Just \"foo\"),Nothing]>" + assert $ replace' (unsafeRegex "(foo)(bar)?" noFlags) (\s xs -> show xs) "" == "<[(Just \"foo\"),(Just \"bar\")]>" + assert $ replace' (unsafeRegex "@(?\\w+)" noFlags) (\s xs -> show xs) "@purescript" == "[(Just \"purescript\")]" + + log "search" + assert $ search (unsafeRegex "b" noFlags) "abc" == Just 1 + assert $ search (unsafeRegex "d" noFlags) "abc" == Nothing + + log "split" + assert $ split (unsafeRegex "" noFlags) "" == [] + assert $ split (unsafeRegex "" noFlags) "abc" == ["a", "b", "c"] + assert $ split (unsafeRegex "b" noFlags) "" == [""] + assert $ split (unsafeRegex "b" noFlags) "abc" == ["a", "c"] + + log "test" + -- Ensure that we have referential transparency for calls to 'test'. No + -- global state should be maintained between these two calls: + let pattern = unsafeRegex "a" (parseFlags "g") + assert $ test pattern "a" + assert $ test pattern "a" + +nea :: Array ~> NonEmptyArray +nea = unsafePartial fromJust <<< fromArray diff --git a/test/Test/Data/String/Unsafe.purs b/test/Test/Data/String/Unsafe.purs new file mode 100644 index 0000000..b6b9aca --- /dev/null +++ b/test/Test/Data/String/Unsafe.purs @@ -0,0 +1,26 @@ +module Test.Data.String.Unsafe (testStringUnsafe) where + +import Prelude + +import Data.String.Unsafe as SU +import Effect (Effect) +import Effect.Console (log) +import Test.Assert (assertEqual) + +testStringUnsafe :: Effect Unit +testStringUnsafe = do + log "charAt" + assertEqual + { actual: SU.charAt 0 "ab" + , expected: 'a' + } + assertEqual + { actual: SU.charAt 1 "ab" + , expected: 'b' + } + + log "char" + assertEqual + { actual: SU.char "a" + , expected: 'a' + } diff --git a/test/Test/Main.purs b/test/Test/Main.purs new file mode 100644 index 0000000..fb9f32e --- /dev/null +++ b/test/Test/Main.purs @@ -0,0 +1,33 @@ +module Test.Main where + +import Prelude + +import Effect (Effect) +import Effect.Console (log) +import Test.Data.String (testString) +import Test.Data.String.CaseInsensitive (testCaseInsensitiveString) +import Test.Data.String.CodePoints (testStringCodePoints) +import Test.Data.String.CodeUnits (testStringCodeUnits) +import Test.Data.String.NonEmpty (testNonEmptyString) +import Test.Data.String.NonEmpty.CodeUnits (testNonEmptyStringCodeUnits) +import Test.Data.String.Regex (testStringRegex) +import Test.Data.String.Unsafe (testStringUnsafe) + +main :: Effect Unit +main = do + log "\n--- Data.String ---\n" + testString + log "\n--- Data.String.CodePoints ---\n" + testStringCodePoints + log "\n--- Data.String.CodeUnits ---\n" + testStringCodeUnits + log "\n--- Data.String.Unsafe ---\n" + testStringUnsafe + log "\n--- Data.String.Regex ---\n" + testStringRegex + log "\n--- Data.String.CaseInsensitive ---\n" + testCaseInsensitiveString + log "\n--- Data.String.NonEmpty ---\n" + testNonEmptyString + log "\n--- Data.String.NonEmpty.CodeUnits ---\n" + testNonEmptyStringCodeUnits