Start doing character removal properly

This makes actual basic editing work. Including things like justify-paragraph etc, so lines get justified by number of UTF8 characters rather than bytes. There are probably tons of broken stuff left, but this actually seems to get the basics working right. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-07-11 10:43:16 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-07-11 10:43:16 -0700
commit: 0a8b429059b47bca13307967ff0dabf0b79fb1d5 (patch)
tree: 666fe3cd8e0651297080a94bec12a100e526febb
parent: 0e9fc2be15b0926dfee08846c906cd3b2668878a (diff)
download: uemacs-0a8b429059b47bca13307967ff0dabf0b79fb1d5.tar.gz
6 files changed, 48 insertions, 16 deletions
diff --git a/eval.c b/eval.c
index ca96ca2..fd4993b 100644
--- a/eval.c
+++ b/eval.c
@@ -588,7 +588,7 @@ int svar(struct variable_description *var, char *value)
 			lastkey = atoi(value);
 			break;
 		case EVCURCHAR:
-			ldelete(1L, FALSE);	/* delete 1 char */
+			ldelchar(1, FALSE);	/* delete 1 char */
 			c = atoi(value);
 			if (c == '\n')
 				lnewline();
diff --git a/line.c b/line.c
index 6e46ffc..172c9cd 100644
--- a/line.c
+++ b/line.c
@@ -268,7 +268,7 @@ int lowrite(int c)
 	if (curwp->w_doto < curwp->w_dotp->l_used &&
 	    (lgetc(curwp->w_dotp, curwp->w_doto) != '\t' ||
 	     ((curwp->w_doto) & tabmask) == tabmask))
-		ldelete(1L, FALSE);
+		ldelchar(1, FALSE);
 	return linsert(1, c);
 }
 
@@ -357,6 +357,30 @@ int lnewline(void)
 	return TRUE;
 }
 
+int lgetchar(unicode_t *c)
+{
+	int len = llength(curwp->w_dotp);
+	char *buf = curwp->w_dotp->l_text;
+	return utf8_to_unicode(buf, curwp->w_doto, len, c);
+}
+
+/*
+ * ldelete() really fundamentally works on bytes, not characters.
+ * It is used for things like "scan 5 words forwards, and remove
+ * the bytes we scanned".
+ *
+ * If you want to delete characters, use ldelchar().
+ */
+int ldelchar(long n, int kflag)
+{
+	while (n-- > 0) {
+		unicode_t c;
+		if (!ldelete(lgetchar(&c), kflag))
+			return FALSE;
+	}
+	return TRUE;
+}
+
 /*
  * This function deletes "n" bytes, starting at dot. It understands how do deal
  * with end of lines, etc. It returns TRUE if all of the characters were
@@ -655,7 +679,7 @@ int yank(int f, int n)
 					if (lnewline() == FALSE)
 						return FALSE;
 				} else {
-					if (linsert(1, c) == FALSE)
+					if (linsert_byte(1, c) == FALSE)
 						return FALSE;
 				}
 			}
diff --git a/line.h b/line.h
index 3900dbe..9eaad61 100644
--- a/line.h
+++ b/line.h
@@ -1,6 +1,8 @@
 #ifndef LINE_H_
 #define LINE_H_
 
+#include "utf8.h"
+
 /*
  * All text is kept in circularly linked lists of "struct line" structures. These
  * begin at the header line (which is the blank line beyond the end of the
@@ -32,6 +34,8 @@ extern int lowrite(int c);
 extern int lover(char *ostr);
 extern int lnewline(void);
 extern int ldelete(long n, int kflag);
+extern int ldelchar(long n, int kflag);
+extern int lgetchar(unicode_t *);
 extern char *getctext(void);
 extern int putctext(char *iline);
 extern int ldelnewline(void);
diff --git a/main.c b/main.c
index a6dabf6..2cf7462 100644
--- a/main.c
+++ b/main.c
@@ -521,7 +521,7 @@ int execute(int c, int f, int n)
 		    curwp->w_doto < curwp->w_dotp->l_used &&
 		    (lgetc(curwp->w_dotp, curwp->w_doto) != '\t' ||
 		     (curwp->w_doto) % 8 == 7))
-			ldelete(1L, FALSE);
+			ldelchar(1, FALSE);
 
 		/* do the appropriate insertion */
 		if (c == '}' && (curbp->b_mode & MDCMOD) != 0)
diff --git a/random.c b/random.c
index 52acd45..240e807 100644
--- a/random.c
+++ b/random.c
@@ -278,7 +278,7 @@ int detab(int f, int n)
 		while (curwp->w_doto < llength(curwp->w_dotp)) {
 			/* if we have a tab */
 			if (lgetc(curwp->w_dotp, curwp->w_doto) == '\t') {
-				ldelete(1L, FALSE);
+				ldelchar(1, FALSE);
 				insspace(TRUE,
 					 (tabmask + 1) -
 					 (curwp->w_doto & tabmask));
@@ -758,7 +758,7 @@ int forwdel(int f, int n)
 			kdelete();
 		thisflag |= CFKILL;
 	}
-	return ldelete((long) n, f);
+	return ldelchar((long) n, f);
 }
 
 /*
@@ -781,7 +781,7 @@ int backdel(int f, int n)
 		thisflag |= CFKILL;
 	}
 	if ((s = backchar(f, n)) == TRUE)
-		s = ldelete((long) n, f);
+		s = ldelchar(n, f);
 	return s;
 }
 
diff --git a/word.c b/word.c
index 4b6fead..83cfe9b 100644
--- a/word.c
+++ b/word.c
@@ -363,7 +363,7 @@ int delbword(int f, int n)
 	}
 	if (forwchar(FALSE, 1) == FALSE)
 		return FALSE;
-      bckdel:return ldelete(size, TRUE);
+      bckdel:return ldelchar(size, TRUE);
 }
 
 /*
@@ -399,7 +399,8 @@ int inword(void)
  */
 int fillpara(int f, int n)
 {
-	int c;		/* current char durring scan    */
+	unicode_t c;		/* current char during scan    */
+	unicode_t wbuf[NSTRING];/* buffer for current word      */
 	int wordlen;	/* length of current word       */
 	int clength;	/* position on line during fill */
 	int i;		/* index during word copy       */
@@ -408,7 +409,6 @@ int fillpara(int f, int n)
 	int firstflag;	/* first word? (needs no space) */
 	struct line *eopline;	/* pointer to line just past EOP */
 	int dotflag;	/* was the last char a period?  */
-	char wbuf[NSTRING];	/* buffer for current word      */
 
 	if (curbp->b_mode & MDVIEW)	/* don't allow this command if      */
 		return rdonly();	/* we are in read only mode     */
@@ -438,16 +438,18 @@ int fillpara(int f, int n)
 	firstflag = TRUE;
 	eopflag = FALSE;
 	while (!eopflag) {
+		int bytes = 1;
+
 		/* get the next character in the paragraph */
 		if (curwp->w_doto == llength(curwp->w_dotp)) {
 			c = ' ';
 			if (lforw(curwp->w_dotp) == eopline)
 				eopflag = TRUE;
 		} else
-			c = lgetc(curwp->w_dotp, curwp->w_doto);
+			bytes = lgetchar(&c);
 
 		/* and then delete it */
-		ldelete(1L, FALSE);
+		ldelete(bytes, FALSE);
 
 		/* if not a separator, just add it in */
 		if (c != ' ' && c != '\t') {
@@ -496,7 +498,8 @@ int fillpara(int f, int n)
  */
 int justpara(int f, int n)
 {
-	int c;		/* current char durring scan    */
+	unicode_t c;		/* current char durring scan    */
+	unicode_t wbuf[NSTRING];/* buffer for current word      */
 	int wordlen;	/* length of current word       */
 	int clength;	/* position on line during fill */
 	int i;		/* index during word copy       */
@@ -504,7 +507,6 @@ int justpara(int f, int n)
 	int eopflag;	/* Are we at the End-Of-Paragraph? */
 	int firstflag;	/* first word? (needs no space) */
 	struct line *eopline;	/* pointer to line just past EOP */
-	char wbuf[NSTRING];	/* buffer for current word      */
 	int leftmarg;		/* left marginal */
 
 	if (curbp->b_mode & MDVIEW)	/* don't allow this command if      */
@@ -542,16 +544,18 @@ int justpara(int f, int n)
 	firstflag = TRUE;
 	eopflag = FALSE;
 	while (!eopflag) {
+		int bytes = 1;
+
 		/* get the next character in the paragraph */
 		if (curwp->w_doto == llength(curwp->w_dotp)) {
 			c = ' ';
 			if (lforw(curwp->w_dotp) == eopline)
 				eopflag = TRUE;
 		} else
-			c = lgetc(curwp->w_dotp, curwp->w_doto);
+			bytes = lgetchar(&c);
 
 		/* and then delete it */
-		ldelete(1L, FALSE);
+		ldelete(bytes, FALSE);
 
 		/* if not a separator, just add it in */
 		if (c != ' ' && c != '\t') {
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-07-11 10:43:16 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-07-11 10:43:16 -0700
commit	0a8b429059b47bca13307967ff0dabf0b79fb1d5 (patch)
tree	666fe3cd8e0651297080a94bec12a100e526febb
parent	0e9fc2be15b0926dfee08846c906cd3b2668878a (diff)
download	uemacs-0a8b429059b47bca13307967ff0dabf0b79fb1d5.tar.gz