#include <iostream>#include <stdio.h>#include <vector>#include <string>#include <algorithm>#include <unordered_map>#include <set>using namespace std;/*问题:All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.For example,Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT",Return:["AAAAACCCCC", "CCCCCAAAAA"].分析:nucleotides:核苷酸,abbreviated:简短的.此题需要寻找出长度为10的重复出现的子串。重复子串问题可以用后缀数组,对后缀数组排序,然后,寻找长度为10的后缀串,判断相邻两个是字符串中是否包含另一个字符串,如果包含,返回但是发现不行,后缀数组,用于找到最长重复子串用commLen。但是发现答案提供的是那种子串不能连在一起的。如果我每次设定起点和终点,然后遍历后续是否出现即可,设定起点和终点时间复杂度为O(n),每次寻找时间复杂度为O(n)输入:AAAAACCCCCAAAAACCCCCCAAAAAGGGTTTAAAAACCCCCAAAAA输出:AAAAACCCCC CCCCCAAAAAno result报错:Memory Limit Exceeded,内存超出限制。因为后缀数组的时间复杂度为O(n^3)学习一下KMP算法关键:1 leecode解法:https://discuss.leetcode.com/topic/27517/7-lines-simple-java-o-n从位置0开始,每次截取10个字符,压入到set中,如果重复就压入真正重复的集合中。避免字符串搜索,本质上是把字符串搜索变为set的比较去重2压入的时候仍然用set,可能存在重复压入*/class Solution {public: vector<string> findRepeatedDnaSequences(string s) { vector<string> result; if(s.empty() || s.size() <= 10) { return result; } //产生后缀数组 int len = s.length(); unordered_map<string ,bool> visited; string str; set<string> setResult; for(int i = 0 ; i + 9 < len ; i++) { str = s.substr(i ,10); //重复,说明是所要求的结果 if(visited.find(str) != visited.end()) { //可能存在重复压入 setResult.insert(str); } else { visited[str] = true; } } for(set<string>::iterator it = setResult.begin() ; it != setResult.end() ; it++) { result.push_back(*it); } return result; }};void PRint(vector<string>& result){ if(result.empty()) { cout << "no result" << endl; return; } int size = result.size(); for(int i = 0 ; i < size ; i++) { cout << result.at(i) << " " ; } cout << endl;}void process(){ string value; Solution solution; vector<string> result; while(cin >> value ) { result = solution.findRepeatedDnaSequences(value); print(result); }}int main(int argc , char* argv[]){ process(); getchar(); return 0;}
新闻热点
疑难解答