C语言-第六章-字符串

字符串和文本的应用

C语言使用char类型的数组元素存储字符串。

1 什么是字符串

双引号之间的任何字符,包括转义后的符号,空格,回车等字符。字符串以”\0”结束。

1
2
3
4
5
6
7
#include <stdio.h>

int main()
{
printf("The character \0 is used for terminate a string."); //显示"The character "
return 0;
}

2 处理字符串和文本的方法

1
char saying[20];

这条语句可以存储一个至多包含19个字符的字符串。

用const将字符串声明为常量:

1
const char message[]="The end of the world is nigh.";

2.1 确定字符串长度

1
2
3
4
5
6
7
8
9
10
11
#include <stdio.h>

int main()
{
const char message[]="uruir.";
int i=0;
while(message[i]) //"\0" 的ASCII码就是 0
i++;
printf("%d", i);
return 0;
}

2.2 用strlen确定长度

1
2
3
4
5
6
7
8
#include <stdio.h>

int main()
{
const char message[]="uruir.";
printf("%d", strlen(message));
return 0;
}

3 字符串操作

将字符串赋值给另一字符串,不能像int那样用”=”,而要用专门函数来将字符一个个传入。

3.1 连接字符串

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#include <stdio.h>

int main()
{
char s1[50]="to be or not to be";
char s2[]=", that is the question.";
int i=strlen(s1), j=strlen(s2);
if(i+j+1>sizeof s1)
printf("can't put a quart into a prin pot.");
else {
j=0;
while(s2[j])
s1[i++] = s2[j++]; //i的原来位置是"\0",正好用s2[]的第一个字符覆盖。
s1[i]='\0'; //末尾加上结束符。
printf("%s", s1);
}
return 0;
}

第12、13行可用下列一行代替:

while(s1[i++] = s2[j++]);  

因为括号内是一个赋值语句,如果s1[i++]不是”\0”,那条件就为真,继续复制,直到s2将”\0”传过来。

3.2 字符串数组

1
2
3
4
5
6
7
8
9
10
11
12
13
#include <stdio.h>

int main()
{
char sayings[2][32] = {
"My name is uruir.",
"What's your name?"
};
int i=0;
for(; i<2; i++)
printf("%s\n", sayings[i]);
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#include <stdio.h>

int main()
{
char say[][50] = {
"to be or not to be",
", that is the question."
};
int i=0,count[] = {0, 0};
for(; i<2; i++)
while(say[i][count[i]])
count[i]++;
if(count[0]+count[1]+1>sizeof say[0])
printf("can't put a quart into a prin pot.");
else {
count[1]=0;
while((say[0][count[0]++] = say[1][count[1]++]));
say[0][count[0]]='\0';
printf("%s", say[0]);
}
return 0;
}

4 字符串库函数

字符串函数在<string.h>头文件中

4.1 使用库函数复制字符串

1
2
if(sizeof(string2) <= sizeof(string1))
strcpy(string1, string2); //2 -> 1

复制前n个字符:

1
2
3
4
5
6
7
8
9
10
11
#include <stdio.h>

int main()
{
char destination[] = "This string will be replaced";
char source[] = "This string will be copied in part";
size_t n = 26;
strncpy(destination, source, n);
printf("%d, %s", sizeof(destination), destination);
return 0;
}

由于string是从source直接copy到destination中,所以如果sizeof(source) > sizeof(destination),那destination中就没有’\0’了,所以上例中的n也不要大于sizeof(destination)。

4.2 使用库函数确定字符串的长度

#include <stddef.h>

size_t count = 0;
count = strlen(string);

这个计数不包括’\0’,而sizeof(string)包括。

4.3 使用库函数连接字符串

strcat(str1, str2); //copy str2 to the end of str1
1
2
3
4
5
6
7
8
9
10
11
#include <stdio.h>
#include <stddef.h>

int main()
{
char destination[] = "This string will be replaced, ";
char source[] = "This string will be copied.";
strcat(destination, source);
printf("%d, %s", strlen(destination), destination); //57, balabala
return 0;
}

如果用sizeof代替strlen,则57 -> 31。

4.4 比较字符串

strcmp(string1, string2);   //以ASCII码确定大小
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <stdio.h>
#include <stddef.h>

int main()
{
char destination[20];
char source[20];
printf("输入第一个字符串:\n");
scanf("%19s", destination); //因为destination本来就是数组,所以不用"&"。
printf("第二个\n");
scanf("%19s", source); //因为变量是一个地址,所以用"&"来让scanf写入到变量指向的空间。
if(strcmp(destination, source) == 0)
printf("两串一样。");
else
printf("%s 比 %s 小。", (strcmp(destination, source) < 0) ? destination : source, (strcmp(destination, source) > 0) ? destination : source);
return 0;
}

4.5 搜索字符串

4.5.1 指针的概念

指针是变量类型,它是含有地址的变量。

1
2
int number = 99;
int *pnumber = &number;

number里含有的是地址,该地址里的内存含有99这个常量。&number现在是指向99的地址,是一个地址!pnumber是一个指针变量,用”int *”定义,所以pnumber这个变量存的是number这个常量所在的地址!

1
2
3
4
5
6
7
8
9
10
#include <stdio.h>

int main()
{
int i = 333;
int *p = &i;
printf("%p\n", i); //%p用于显示地址,这里显示8个16进制组成的地址,该地址里存放333这个常量。本机32位。
printf("%p", *p); //和上面一行显示相同的地址,也就是p这个变量存放的地址。指针变量p指向变量i,变量i里存放333。
return 0;
}

比如,假设我的内存是4GB,那就有约4,000,000,000个byte。那么”0000,0000”就指向第一个byte,”ffff,ffff”指向最后一byte。再假设”int i”时,系统给”i”分配的是第15号空间,即”0000,000f”,因为int占4个字节,所以”0000,000f”,”0000,0010”,”0000,0011”和”0000,0012”这4个字节里存放的就是333这个int常量。因为p指向i,所以p里存放”0000,000f”这么一个地址。当系统通过p来使用333时,先看p,即”0000,000f”,就转向地址为”0000,000f”的内存空间。*是取消引用运算符,其作用是访问指针指定的地址中存储的数据。

4.5.2 在字符串中查找子字符串
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <stdio.h>
#include <string.h>

int main()
{
char text[] = "Every dog has his day";
char str1[] = "dog";
char str2[] = "Dog";
char *p1 = NULL;
char *p2 = NULL;

if(strstr(text,str1) == NULL)
printf("%s was not found.\n", str1);
else
printf("%s was found in \"%s.\", %p, %p\n", str1, text, text, strstr(text,str1)); //text的地址和str1的地址

if(strstr(text,str2) == NULL)
printf("%s was not found.\n", str2);
else
printf("%s was found in \"%s.\", %p, %p\n", str2, text, text, strstr(text,str2));

return 0;
}

5 分析和转换字符串

5.1 用gets读入字符串

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#include <stdio.h>
#include <ctype.h>

int main(void)
{
char buffer[80]; /* Input buffer */
int i = 0; /* Buffer index */
int num_letters = 0; /* Number of letters in input */
int num_digits = 0; /* Number of digits in input */

printf("\nEnter an interesting string of less than 80 characters:\n");
gets(buffer); //scanf以空格或回车结束,gets能读入空格直到回车


while(buffer[i] != '\0')
{
if(isalpha(buffer[i]))
num_letters++; /* Increment letter count */
if(isdigit(buffer[i++]))
num_digits++; /* Increment digit count */
}
printf("\nYour string contained %d letters and %d digits.\n",
num_letters, num_digits);
return 0;
}

使用fgets函数可以确保写入安全。fgets(buffer, sizeof(text), stdin)是从stdin获取大小为text串大小的串,然后只要buffer[strlen(text)-1]=’\0’,读入就安全了。

5.2 将字符串转换成数值

atof()  //字符串参数转换成double类型的值
atoi()  //int
atol()  //long
atoll() //long long
1
2
3
4
5
#include <stdlib.h>

char value_str[] = "98.4";
double value = 0;
value = atof(value_str);

6 小程序

6.1 问题

从键盘读入任意长度的一段文本,确定该文本中每个单词出现的频率(忽略大小写)。

6.2 分析

  • 使用fgets()
  • 文本包含标点,删除之
  • 用空格来区分单词,将单词存入另一个数组中,小写保存

6.3 解决方案

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>

#define TEXTLEN 10000 /* Maximum length of text */
#define BUFFERSIZE 100 /* Input buffer size */
#define MAXWORDS 500 /* Maximum number of different words */
#define WORDLEN 15 /* Maximum word length */

int main(void)
{
char text[TEXTLEN+1];
char buffer[BUFFERSIZE];
char endstr[] = "*\n"; /* Signals end of input */

const char space = ' ';
const char quote = '\'';

char words[MAXWORDS][WORDLEN+1];
int nword[MAXWORDS]; /* Number of word occurrences */
char word[WORDLEN+1]; /* Stores a single word */
int wordlen = 0; /* Length of a word */
int wordcount = 0; /* Number of words stored */


printf("Enter text on an arbitrary number of lines.");
printf("\nEnter a line containing just an asterisk to end input:\n\n");

/* Read an arbitrary number of lines of text */
while(true)
{
/* A string containing an asterisk followed by newline */
/* signals end of input */
if(!strcmp(fgets(buffer, BUFFERSIZE, stdin), endstr))
break;

/* Check if we have space for latest input */
if(strlen(text)+strlen(buffer)+1 > TEXTLEN)
{
printf("Maximum capacity for text exceeded. Terminating program.");
return 1;
}
strcat(text, buffer);
}

/* Replace everything except alpha and single quote characters by spaces */
for(int i = 0 ; i < strlen(text) ; i++)
{
if(text[i] == quote || isalnum(text[i]))
continue;
text[i] = space;
}

/* Find unique words and store in words array */
int index = 0;
while(true)
{
/* Ignore any leading spaces before a word */
while(text[index] == space)
++index;

/* If we are at the end of text, we are done */
if(text[index] == '\0')
break;

/* Extract a word */
wordlen = 0; /* Reset word length */
while(text[index] == quote || isalpha(text[index]))
{
/* Check if word is too long */
if(wordlen == WORDLEN)
{
printf("Maximum word length exceeded. Terminating program.");
return 1;
}
word[wordlen++] = tolower(text[index++]); /* Copy as lowercase */
}
word[wordlen] = '\0'; /* Add string terminator */

/* Check for word already stored */
bool isnew = true;
for(int i = 0 ; i< wordcount ; i++)
if(strcmp(word, words[i]) == 0)
{
++nword[i];
isnew = false;
break;
}

if(isnew)
{
/* Check if we have space for another word */
if(wordcount >= MAXWORDS)
{
printf("\n Maximum word count exceeded. Terminating program.");
return 1;
}

strcpy(words[wordcount], word); /* Store the new word */
nword[wordcount++] = 1; /* Set its count to 1 */
}
}

/* Output the words and frequencies */
for(int i = 0 ; i<wordcount ; i++)
{
if( !(i%3) ) /* Three words to a line */
printf("\n");
printf(" %-15s%5d", words[i], nword[i]);
}

return 0;
}

7 练习

7.1 数值以英文显示

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#include <stdio.h>
#include <string.h>

int main(void)
{
char *unit_words[] = {"zero", "one","two","three","four","five","six","seven","eight","nine"};
char *teen_words[] = {"ten", "eleven","twelve","thirteen","fourteen","fifteen","sixteen","seventeen","eighteen","nineteen"};
char *ten_words[] = {"error", "error","twenty","thirty","forty","fifty","sixty","seventy","eighty","ninety"};
char hundred[] = " hundred";
char and[] = " and ";
char value_str[50] = "";
int value = 0; /* Integer to be converted */
int digits[] = {0,0,0}; /* Stores digits of value entered */
int i = 0;

printf("Enter a positive integer less than 1000: ");
scanf("%d",&value);
if(value >= 1000)
value = 999;
else if(value < 1)
value = 1;

while(value > 0)
{
digits[i++] = value%10;
value /= 10;
}

if(digits[2] > 0)
{
strcat(strcat(value_str,unit_words[digits[2]]), hundred);
if(digits[1]>0 || digits[0]>0)
strcat(value_str, and);
}
if(digits[1] > 0)
{
if(digits[1] == 1)
strcat(value_str,teen_words[digits[0]]);
else
{
strcat(value_str,ten_words[digits[1]]);
if(digits[0] > 0)
strcat(strcat(value_str, " "), unit_words[digits[0]]);
}
}
else
if(digits[0] > 0)
strcat(value_str, unit_words[digits[0]]);
printf("\n%s\n", value_str);
return 0;
}

7.2 一行显示一个单词

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <stdio.h>
#include <string.h>

int main(void)
{
char list[5000]; /* Stores the list of comma separated words */
char words[500][20]; /* Array for 500 words of up to 20 characters */
const char comma = ',';
const char space = ' ';
int word_count = 0; /* Number of words found */
int word_length = 0; /* Current word length */
int index = 0; /* Character position */

printf("Enter a comma separated list of words:\n");
gets(list); /* Read the list of words */

/* Find words in list */
while(list[index] != '\0')
{
/* Skip over spaces and commas */
while(list[index] == space || list[index] == comma)
++index;

/* Copy characters that are not space, comma or \0 as part of a word */
while(list[index] != space && list[index] != comma && list[index] != '\0')
words[word_count][word_length++] = list[index++];

words[word_count++][word_length] = '\0'; /* Append terminator */
word_length = 0; /* Reset ready for next word */
}

/* List the words that were found */
printf("\nThe words in the list are:\n");
for(index = 0 ; index<word_count ; index++)
printf("%s\n",words[index]);

return 0;
}

7.3 随机显示一段文本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#include <stdio.h>
#include <time.h>
#include <stdlib.h>

int main(void)
{
char thoughts[][50] = {"Wherever you go, there you are!",
"A nod is as good as a wink to a blind horse.",
"Many hands make light work.",
"Too many cooks spoil the broth.",
"A rolling stone gathers no moss.",
"A wise man will cover the hole in his carpet."};

srand((unsigned int)time(NULL));

printf("Today's thought is:\n%s\n", thoughts[rand()%(sizeof thoughts/sizeof thoughts[0])]);
return 0;
}

7.4 回文

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#include <stdio.h>
#include <string.h>
#include <ctype.h>

int main(void)
{
char sentence[500]; /* Stores the sentence to be tested */
char sentence_chars[500]; /* Stores the sentence without punctuation and spaces */
size_t j = 0; /* Index to character position */
size_t length = 0; /* Length of a string */

printf("Enter a sentence to be tested:\n");
gets(sentence);

/* Copy only letters as lowercase */
for (size_t i = 0 ; i< strlen(sentence) ; i++)
if(isalpha(sentence[i]))
sentence_chars[j++] = tolower(sentence[i]);
sentence_chars[j] = '\0'; /* Append string terminator */

length = strlen(sentence_chars); /* Get the satring length */

/* Compare matching characters in the string */
/* If any pair are not the same, then it's not a palindrome */
for(size_t i = 0 ; i<length/2 ; i++)
if(sentence_chars[i] != sentence_chars[length-1-i])
{
printf("\n The sentence you entered is not a palindrome.\n");
return 0;
}
/* If we arrive here all matching pairs of characters are equal */
printf("\n The sentence you entered is a palindrome.\n");
return 0;
}