Permalink
Cannot retrieve contributors at this time
194 lines (168 sloc)
4.99 KB
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
cpython/Python/Python-tokenize.c
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "Python.h" | |
| #include "../Parser/tokenizer.h" | |
| static struct PyModuleDef _tokenizemodule; | |
| typedef struct { | |
| PyTypeObject *TokenizerIter; | |
| } tokenize_state; | |
| static tokenize_state * | |
| get_tokenize_state(PyObject *module) { | |
| return (tokenize_state *)PyModule_GetState(module); | |
| } | |
| #define _tokenize_get_state_by_type(type) \ | |
| get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule)) | |
| #include "pycore_runtime.h" | |
| #include "clinic/Python-tokenize.c.h" | |
| /*[clinic input] | |
| module _tokenizer | |
| class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter" | |
| [clinic start generated code]*/ | |
| /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ | |
| typedef struct | |
| { | |
| PyObject_HEAD struct tok_state *tok; | |
| } tokenizeriterobject; | |
| /*[clinic input] | |
| @classmethod | |
| _tokenizer.tokenizeriter.__new__ as tokenizeriter_new | |
| source: str | |
| [clinic start generated code]*/ | |
| static PyObject * | |
| tokenizeriter_new_impl(PyTypeObject *type, const char *source) | |
| /*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/ | |
| { | |
| tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); | |
| if (self == NULL) { | |
| return NULL; | |
| } | |
| PyObject *filename = PyUnicode_FromString("<string>"); | |
| if (filename == NULL) { | |
| return NULL; | |
| } | |
| self->tok = _PyTokenizer_FromUTF8(source, 1); | |
| if (self->tok == NULL) { | |
| Py_DECREF(filename); | |
| return NULL; | |
| } | |
| self->tok->filename = filename; | |
| return (PyObject *)self; | |
| } | |
| static PyObject * | |
| tokenizeriter_next(tokenizeriterobject *it) | |
| { | |
| struct token token; | |
| int type = _PyTokenizer_Get(it->tok, &token); | |
| if (type == ERRORTOKEN && PyErr_Occurred()) { | |
| return NULL; | |
| } | |
| if (type == ERRORTOKEN || type == ENDMARKER) { | |
| PyErr_SetString(PyExc_StopIteration, "EOF"); | |
| return NULL; | |
| } | |
| PyObject *str = NULL; | |
| if (token.start == NULL || token.end == NULL) { | |
| str = PyUnicode_FromString(""); | |
| } | |
| else { | |
| str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); | |
| } | |
| if (str == NULL) { | |
| return NULL; | |
| } | |
| Py_ssize_t size = it->tok->inp - it->tok->buf; | |
| PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace"); | |
| if (line == NULL) { | |
| Py_DECREF(str); | |
| return NULL; | |
| } | |
| const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start; | |
| int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno; | |
| int end_lineno = it->tok->lineno; | |
| int col_offset = -1; | |
| int end_col_offset = -1; | |
| if (token.start != NULL && token.start >= line_start) { | |
| col_offset = (int)(token.start - line_start); | |
| } | |
| if (token.end != NULL && token.end >= it->tok->line_start) { | |
| end_col_offset = (int)(token.end - it->tok->line_start); | |
| } | |
| return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line); | |
| } | |
| static void | |
| tokenizeriter_dealloc(tokenizeriterobject *it) | |
| { | |
| PyTypeObject *tp = Py_TYPE(it); | |
| _PyTokenizer_Free(it->tok); | |
| tp->tp_free(it); | |
| Py_DECREF(tp); | |
| } | |
| static PyType_Slot tokenizeriter_slots[] = { | |
| {Py_tp_new, tokenizeriter_new}, | |
| {Py_tp_dealloc, tokenizeriter_dealloc}, | |
| {Py_tp_getattro, PyObject_GenericGetAttr}, | |
| {Py_tp_iter, PyObject_SelfIter}, | |
| {Py_tp_iternext, tokenizeriter_next}, | |
| {0, NULL}, | |
| }; | |
| static PyType_Spec tokenizeriter_spec = { | |
| .name = "_tokenize.TokenizerIter", | |
| .basicsize = sizeof(tokenizeriterobject), | |
| .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), | |
| .slots = tokenizeriter_slots, | |
| }; | |
| static int | |
| tokenizemodule_exec(PyObject *m) | |
| { | |
| tokenize_state *state = get_tokenize_state(m); | |
| if (state == NULL) { | |
| return -1; | |
| } | |
| state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL); | |
| if (state->TokenizerIter == NULL) { | |
| return -1; | |
| } | |
| if (PyModule_AddType(m, state->TokenizerIter) < 0) { | |
| return -1; | |
| } | |
| return 0; | |
| } | |
| static PyMethodDef tokenize_methods[] = { | |
| {NULL, NULL, 0, NULL} /* Sentinel */ | |
| }; | |
| static PyModuleDef_Slot tokenizemodule_slots[] = { | |
| {Py_mod_exec, tokenizemodule_exec}, | |
| {0, NULL} | |
| }; | |
| static int | |
| tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg) | |
| { | |
| tokenize_state *state = get_tokenize_state(m); | |
| Py_VISIT(state->TokenizerIter); | |
| return 0; | |
| } | |
| static int | |
| tokenizemodule_clear(PyObject *m) | |
| { | |
| tokenize_state *state = get_tokenize_state(m); | |
| Py_CLEAR(state->TokenizerIter); | |
| return 0; | |
| } | |
| static void | |
| tokenizemodule_free(void *m) | |
| { | |
| tokenizemodule_clear((PyObject *)m); | |
| } | |
| static struct PyModuleDef _tokenizemodule = { | |
| PyModuleDef_HEAD_INIT, | |
| .m_name = "_tokenize", | |
| .m_size = sizeof(tokenize_state), | |
| .m_slots = tokenizemodule_slots, | |
| .m_methods = tokenize_methods, | |
| .m_traverse = tokenizemodule_traverse, | |
| .m_clear = tokenizemodule_clear, | |
| .m_free = tokenizemodule_free, | |
| }; | |
| PyMODINIT_FUNC | |
| PyInit__tokenize(void) | |
| { | |
| return PyModuleDef_Init(&_tokenizemodule); | |
| } |